Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu v2

2012-10-30 Thread Marek Olšák
This looks good to me. Thank you.

Marek

On Tue, Oct 30, 2012 at 11:04 PM,   wrote:
> From: Jerome Glisse 
>
> On r6xx/r7xx shader resource management need to make sure that the
> shader does not goes over the gpr register limit. Each specific
> asic has a maxmimum register that can be split btw shader stage.
> For each stage the shader must not use more register than the
> limit programmed.
>
> v2: Print an error message when discarding draw. Don't add another
> boolean to context structure, but rather propagate the discard
> boolean through the call chain.
>
> Signed-off-by: Jerome Glisse 
> ---
>  src/gallium/drivers/r600/r600_pipe.h |  2 +-
>  src/gallium/drivers/r600/r600_state.c| 67 
> +++-
>  src/gallium/drivers/r600/r600_state_common.c | 27 ++-
>  3 files changed, 62 insertions(+), 34 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_pipe.h 
> b/src/gallium/drivers/r600/r600_pipe.h
> index ff2a5fd..3edef40 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -595,7 +595,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx);
>  void *r600_create_resolve_blend(struct r600_context *rctx);
>  void *r700_create_resolve_blend(struct r600_context *rctx);
>  void *r600_create_decompress_blend(struct r600_context *rctx);
> -void r600_adjust_gprs(struct r600_context *rctx);
> +bool r600_adjust_gprs(struct r600_context *rctx);
>  boolean r600_is_format_supported(struct pipe_screen *screen,
>  enum pipe_format format,
>  enum pipe_texture_target target,
> diff --git a/src/gallium/drivers/r600/r600_state.c 
> b/src/gallium/drivers/r600/r600_state.c
> index 7d07008..76fe44d 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -2187,36 +2187,61 @@ void r600_init_state_functions(struct r600_context 
> *rctx)
>  }
>
>  /* Adjust GPR allocation on R6xx/R7xx */
> -void r600_adjust_gprs(struct r600_context *rctx)
> +bool r600_adjust_gprs(struct r600_context *rctx)
>  {
> -   unsigned num_ps_gprs = rctx->default_ps_gprs;
> -   unsigned num_vs_gprs = rctx->default_vs_gprs;
> +   unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
> +   unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
> +   unsigned new_num_ps_gprs = num_ps_gprs;
> +   unsigned new_num_vs_gprs = num_vs_gprs;
> +   unsigned cur_num_ps_gprs = 
> G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +   unsigned cur_num_vs_gprs = 
> G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +   unsigned def_num_ps_gprs = rctx->default_ps_gprs;
> +   unsigned def_num_vs_gprs = rctx->default_vs_gprs;
> +   unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
> +   /* hardware will reserve twice num_clause_temp_gprs */
> +   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
> def_num_clause_temp_gprs * 2;
> unsigned tmp;
> -   int diff;
>
> -   if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) 
> {
> -   diff = rctx->ps_shader->current->shader.bc.ngpr - 
> rctx->default_ps_gprs;
> -   num_vs_gprs -= diff;
> -   num_ps_gprs += diff;
> -   }
> -
> -   if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
> -   {
> -   diff = rctx->vs_shader->current->shader.bc.ngpr - 
> rctx->default_vs_gprs;
> -   num_ps_gprs -= diff;
> -   num_vs_gprs += diff;
> +   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to 
> max_gprs */
> +   if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > 
> cur_num_vs_gprs) {
> +   /* try to use switch back to default */
> +   if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > 
> def_num_vs_gprs) {
> +   /* always privilege vs stage so that at worst we have 
> the
> +* pixel stage producing wrong output (not the vertex
> +* stage) */
> +   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
> def_num_clause_temp_gprs * 2);
> +   new_num_vs_gprs = num_vs_gprs;
> +   } else {
> +   new_num_ps_gprs = def_num_ps_gprs;
> +   new_num_vs_gprs = def_num_vs_gprs;
> +   }
> +   } else {
> +   return true;
> }
>
> -   tmp = 0;
> -   tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
> -   tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
> -   tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs);
> -
> -   if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) {
> +   /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
> +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the 

[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu v2

2012-10-30 Thread j . glisse
From: Jerome Glisse 

On r6xx/r7xx shader resource management need to make sure that the
shader does not goes over the gpr register limit. Each specific
asic has a maxmimum register that can be split btw shader stage.
For each stage the shader must not use more register than the
limit programmed.

v2: Print an error message when discarding draw. Don't add another
boolean to context structure, but rather propagate the discard
boolean through the call chain.

Signed-off-by: Jerome Glisse 
---
 src/gallium/drivers/r600/r600_pipe.h |  2 +-
 src/gallium/drivers/r600/r600_state.c| 67 +++-
 src/gallium/drivers/r600/r600_state_common.c | 27 ++-
 3 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index ff2a5fd..3edef40 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -595,7 +595,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx);
 void *r600_create_resolve_blend(struct r600_context *rctx);
 void *r700_create_resolve_blend(struct r600_context *rctx);
 void *r600_create_decompress_blend(struct r600_context *rctx);
-void r600_adjust_gprs(struct r600_context *rctx);
+bool r600_adjust_gprs(struct r600_context *rctx);
 boolean r600_is_format_supported(struct pipe_screen *screen,
 enum pipe_format format,
 enum pipe_texture_target target,
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 7d07008..76fe44d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2187,36 +2187,61 @@ void r600_init_state_functions(struct r600_context 
*rctx)
 }
 
 /* Adjust GPR allocation on R6xx/R7xx */
-void r600_adjust_gprs(struct r600_context *rctx)
+bool r600_adjust_gprs(struct r600_context *rctx)
 {
-   unsigned num_ps_gprs = rctx->default_ps_gprs;
-   unsigned num_vs_gprs = rctx->default_vs_gprs;
+   unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
+   unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
+   unsigned new_num_ps_gprs = num_ps_gprs;
+   unsigned new_num_vs_gprs = num_vs_gprs;
+   unsigned cur_num_ps_gprs = 
G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+   unsigned cur_num_vs_gprs = 
G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+   unsigned def_num_ps_gprs = rctx->default_ps_gprs;
+   unsigned def_num_vs_gprs = rctx->default_vs_gprs;
+   unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
+   /* hardware will reserve twice num_clause_temp_gprs */
+   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
def_num_clause_temp_gprs * 2;
unsigned tmp;
-   int diff;
 
-   if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
-   diff = rctx->ps_shader->current->shader.bc.ngpr - 
rctx->default_ps_gprs;
-   num_vs_gprs -= diff;
-   num_ps_gprs += diff;
-   }
-
-   if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
-   {
-   diff = rctx->vs_shader->current->shader.bc.ngpr - 
rctx->default_vs_gprs;
-   num_ps_gprs -= diff;
-   num_vs_gprs += diff;
+   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs 
*/
+   if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > 
cur_num_vs_gprs) {
+   /* try to use switch back to default */
+   if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > 
def_num_vs_gprs) {
+   /* always privilege vs stage so that at worst we have 
the
+* pixel stage producing wrong output (not the vertex
+* stage) */
+   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
def_num_clause_temp_gprs * 2);
+   new_num_vs_gprs = num_vs_gprs;
+   } else {
+   new_num_ps_gprs = def_num_ps_gprs;
+   new_num_vs_gprs = def_num_vs_gprs;
+   }
+   } else {
+   return true;
}
 
-   tmp = 0;
-   tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
-   tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
-   tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs);
-
-   if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) {
+   /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
+* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
+* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
+* it will lockup. So in this case just discard the draw command
+* and don't change the current gprs repartitions.
+*/
+   if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs >

Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu

2012-10-27 Thread Alex Deucher
On Fri, Oct 26, 2012 at 10:01 PM,   wrote:
> From: Jerome Glisse 
>
> On r6xx/r7xx shader resource management need to make sure that the
> shader does not goes over the gpr register limit. Each specific
> asic has a maxmimum register that can be split btw shader stage.
> For each stage the shader must not use more register than the
> limit programmed.

We may also want to add a new parameter to the radeon info ioctl to
fetch the GPR limit for each asic.

Alex

>
> Signed-off-by: Jerome Glisse 
> ---
>  src/gallium/drivers/r600/r600_pipe.h |  1 +
>  src/gallium/drivers/r600/r600_state.c| 60 
> +++-
>  src/gallium/drivers/r600/r600_state_common.c | 22 +-
>  3 files changed, 55 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_pipe.h 
> b/src/gallium/drivers/r600/r600_pipe.h
> index ff2a5fd..2045af3 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -363,6 +363,7 @@ struct r600_context {
> enum chip_class chip_class;
> boolean has_vertex_cache;
> boolean keep_tiling_flags;
> +   booldiscard_draw;
> unsigneddefault_ps_gprs, default_vs_gprs;
> unsignedr6xx_num_clause_temp_gprs;
> unsignedbackend_mask;
> diff --git a/src/gallium/drivers/r600/r600_state.c 
> b/src/gallium/drivers/r600/r600_state.c
> index 7d07008..43af934 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context 
> *rctx)
>  /* Adjust GPR allocation on R6xx/R7xx */
>  void r600_adjust_gprs(struct r600_context *rctx)
>  {
> -   unsigned num_ps_gprs = rctx->default_ps_gprs;
> -   unsigned num_vs_gprs = rctx->default_vs_gprs;
> +   unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
> +   unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
> +   unsigned new_num_ps_gprs = num_ps_gprs;
> +   unsigned new_num_vs_gprs = num_vs_gprs;
> +   unsigned cur_num_ps_gprs = 
> G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +   unsigned cur_num_vs_gprs = 
> G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +   unsigned def_num_ps_gprs = rctx->default_ps_gprs;
> +   unsigned def_num_vs_gprs = rctx->default_vs_gprs;
> +   unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
> +   /* hardware will reserve twice num_clause_temp_gprs */
> +   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
> def_num_clause_temp_gprs * 2;
> unsigned tmp;
> -   int diff;
>
> -   if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) 
> {
> -   diff = rctx->ps_shader->current->shader.bc.ngpr - 
> rctx->default_ps_gprs;
> -   num_vs_gprs -= diff;
> -   num_ps_gprs += diff;
> +   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to 
> max_gprs */
> +   if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > 
> cur_num_vs_gprs) {
> +   /* try to use switch back to default */
> +   if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > 
> def_num_vs_gprs) {
> +   /* always privilege vs stage so that at worst we have 
> the
> +* pixel stage producing wrong output (not the vertex
> +* stage) */
> +   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
> def_num_clause_temp_gprs * 2);
> +   new_num_vs_gprs = num_vs_gprs;
> +   } else {
> +   new_num_ps_gprs = def_num_ps_gprs;
> +   new_num_vs_gprs = def_num_vs_gprs;
> +   }
> +   } else {
> +   rctx->discard_draw = false;
> +   return;
> }
>
> -   if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
> -   {
> -   diff = rctx->vs_shader->current->shader.bc.ngpr - 
> rctx->default_vs_gprs;
> -   num_ps_gprs -= diff;
> -   num_vs_gprs += diff;
> +   /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
> +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
> +* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
> +* it will lockup. So in this case just discard the draw command
> +* and don't change the current gprs repartitions.
> +*/
> +   rctx->discard_draw = false;
> +   if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) {
> +   rctx->discard_draw = true;
> +   return;
> }
>
> -   tmp = 0;
> -   tmp |= S_008C04_NUM_PS_GPRS(num_ps_

Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu

2012-10-27 Thread Marek Olšák
FWIW, instead of putting the discard_draw flag in r600_context, it
would be cleaner to have r600_adjust_gprs return false if drawing
should be skipped, then r600_update_derived_state would return false
and draw_vbo would skip rendering. That way you wouldn't have to add
any comments in draw_vbo, because it would be clear where the error is
coming from.

Using R600_ERR to report the error should be sufficient.
r600_adjust_gprs seems to be the best place for that.

Marek

On Sat, Oct 27, 2012 at 3:47 AM, Jerome Glisse  wrote:
> On Fri, Oct 26, 2012 at 10:01 PM,   wrote:
>> From: Jerome Glisse 
>>
>> On r6xx/r7xx shader resource management need to make sure that the
>> shader does not goes over the gpr register limit. Each specific
>> asic has a maxmimum register that can be split btw shader stage.
>> For each stage the shader must not use more register than the
>> limit programmed.
>>
>> Signed-off-by: Jerome Glisse 
>
> I haven't yet fully tested it on wide range of GPU but it fixes piglit
> case that were locking up o one can directly use quick-drivers. I
> mostly would like feedback on if we should print a warning when we
> discard a draw command because shader exceed limit.
>
> Note that with this patch the test that were locking up fails but with
> a simple patch on top of that (decreasing clause temp gpr to 2) they
> pass.
>
> Regards,
> Jerome
>
>> ---
>>  src/gallium/drivers/r600/r600_pipe.h |  1 +
>>  src/gallium/drivers/r600/r600_state.c| 60 
>> +++-
>>  src/gallium/drivers/r600/r600_state_common.c | 22 +-
>>  3 files changed, 55 insertions(+), 28 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_pipe.h 
>> b/src/gallium/drivers/r600/r600_pipe.h
>> index ff2a5fd..2045af3 100644
>> --- a/src/gallium/drivers/r600/r600_pipe.h
>> +++ b/src/gallium/drivers/r600/r600_pipe.h
>> @@ -363,6 +363,7 @@ struct r600_context {
>> enum chip_class chip_class;
>> boolean has_vertex_cache;
>> boolean keep_tiling_flags;
>> +   booldiscard_draw;
>> unsigneddefault_ps_gprs, default_vs_gprs;
>> unsignedr6xx_num_clause_temp_gprs;
>> unsignedbackend_mask;
>> diff --git a/src/gallium/drivers/r600/r600_state.c 
>> b/src/gallium/drivers/r600/r600_state.c
>> index 7d07008..43af934 100644
>> --- a/src/gallium/drivers/r600/r600_state.c
>> +++ b/src/gallium/drivers/r600/r600_state.c
>> @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context 
>> *rctx)
>>  /* Adjust GPR allocation on R6xx/R7xx */
>>  void r600_adjust_gprs(struct r600_context *rctx)
>>  {
>> -   unsigned num_ps_gprs = rctx->default_ps_gprs;
>> -   unsigned num_vs_gprs = rctx->default_vs_gprs;
>> +   unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
>> +   unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
>> +   unsigned new_num_ps_gprs = num_ps_gprs;
>> +   unsigned new_num_vs_gprs = num_vs_gprs;
>> +   unsigned cur_num_ps_gprs = 
>> G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
>> +   unsigned cur_num_vs_gprs = 
>> G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
>> +   unsigned def_num_ps_gprs = rctx->default_ps_gprs;
>> +   unsigned def_num_vs_gprs = rctx->default_vs_gprs;
>> +   unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
>> +   /* hardware will reserve twice num_clause_temp_gprs */
>> +   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
>> def_num_clause_temp_gprs * 2;
>> unsigned tmp;
>> -   int diff;
>>
>> -   if (rctx->ps_shader->current->shader.bc.ngpr > 
>> rctx->default_ps_gprs) {
>> -   diff = rctx->ps_shader->current->shader.bc.ngpr - 
>> rctx->default_ps_gprs;
>> -   num_vs_gprs -= diff;
>> -   num_ps_gprs += diff;
>> +   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to 
>> max_gprs */
>> +   if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > 
>> cur_num_vs_gprs) {
>> +   /* try to use switch back to default */
>> +   if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > 
>> def_num_vs_gprs) {
>> +   /* always privilege vs stage so that at worst we 
>> have the
>> +* pixel stage producing wrong output (not the vertex
>> +* stage) */
>> +   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
>> def_num_clause_temp_gprs * 2);
>> +   new_num_vs_gprs = num_vs_gprs;
>> +   } else {
>> +   new_num_ps_gprs = def_num_ps_gprs;
>> +   new_num_vs_gprs = def_num_vs_gprs;
>> +   }
>> +   } else {
>> +   rctx->discard_draw = false;
>> +   

Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu

2012-10-26 Thread Jerome Glisse
On Fri, Oct 26, 2012 at 10:01 PM,   wrote:
> From: Jerome Glisse 
>
> On r6xx/r7xx shader resource management need to make sure that the
> shader does not goes over the gpr register limit. Each specific
> asic has a maxmimum register that can be split btw shader stage.
> For each stage the shader must not use more register than the
> limit programmed.
>
> Signed-off-by: Jerome Glisse 

I haven't yet fully tested it on wide range of GPU but it fixes piglit
case that were locking up o one can directly use quick-drivers. I
mostly would like feedback on if we should print a warning when we
discard a draw command because shader exceed limit.

Note that with this patch the test that were locking up fails but with
a simple patch on top of that (decreasing clause temp gpr to 2) they
pass.

Regards,
Jerome

> ---
>  src/gallium/drivers/r600/r600_pipe.h |  1 +
>  src/gallium/drivers/r600/r600_state.c| 60 
> +++-
>  src/gallium/drivers/r600/r600_state_common.c | 22 +-
>  3 files changed, 55 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_pipe.h 
> b/src/gallium/drivers/r600/r600_pipe.h
> index ff2a5fd..2045af3 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -363,6 +363,7 @@ struct r600_context {
> enum chip_class chip_class;
> boolean has_vertex_cache;
> boolean keep_tiling_flags;
> +   booldiscard_draw;
> unsigneddefault_ps_gprs, default_vs_gprs;
> unsignedr6xx_num_clause_temp_gprs;
> unsignedbackend_mask;
> diff --git a/src/gallium/drivers/r600/r600_state.c 
> b/src/gallium/drivers/r600/r600_state.c
> index 7d07008..43af934 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context 
> *rctx)
>  /* Adjust GPR allocation on R6xx/R7xx */
>  void r600_adjust_gprs(struct r600_context *rctx)
>  {
> -   unsigned num_ps_gprs = rctx->default_ps_gprs;
> -   unsigned num_vs_gprs = rctx->default_vs_gprs;
> +   unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
> +   unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
> +   unsigned new_num_ps_gprs = num_ps_gprs;
> +   unsigned new_num_vs_gprs = num_vs_gprs;
> +   unsigned cur_num_ps_gprs = 
> G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +   unsigned cur_num_vs_gprs = 
> G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
> +   unsigned def_num_ps_gprs = rctx->default_ps_gprs;
> +   unsigned def_num_vs_gprs = rctx->default_vs_gprs;
> +   unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
> +   /* hardware will reserve twice num_clause_temp_gprs */
> +   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
> def_num_clause_temp_gprs * 2;
> unsigned tmp;
> -   int diff;
>
> -   if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) 
> {
> -   diff = rctx->ps_shader->current->shader.bc.ngpr - 
> rctx->default_ps_gprs;
> -   num_vs_gprs -= diff;
> -   num_ps_gprs += diff;
> +   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to 
> max_gprs */
> +   if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > 
> cur_num_vs_gprs) {
> +   /* try to use switch back to default */
> +   if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > 
> def_num_vs_gprs) {
> +   /* always privilege vs stage so that at worst we have 
> the
> +* pixel stage producing wrong output (not the vertex
> +* stage) */
> +   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
> def_num_clause_temp_gprs * 2);
> +   new_num_vs_gprs = num_vs_gprs;
> +   } else {
> +   new_num_ps_gprs = def_num_ps_gprs;
> +   new_num_vs_gprs = def_num_vs_gprs;
> +   }
> +   } else {
> +   rctx->discard_draw = false;
> +   return;
> }
>
> -   if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
> -   {
> -   diff = rctx->vs_shader->current->shader.bc.ngpr - 
> rctx->default_vs_gprs;
> -   num_ps_gprs -= diff;
> -   num_vs_gprs += diff;
> +   /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
> +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
> +* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
> +* it will lockup. So in this case just discard the draw command
> +* and don't change t

[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu

2012-10-26 Thread j . glisse
From: Jerome Glisse 

On r6xx/r7xx shader resource management need to make sure that the
shader does not goes over the gpr register limit. Each specific
asic has a maxmimum register that can be split btw shader stage.
For each stage the shader must not use more register than the
limit programmed.

Signed-off-by: Jerome Glisse 
---
 src/gallium/drivers/r600/r600_pipe.h |  1 +
 src/gallium/drivers/r600/r600_state.c| 60 +++-
 src/gallium/drivers/r600/r600_state_common.c | 22 +-
 3 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index ff2a5fd..2045af3 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -363,6 +363,7 @@ struct r600_context {
enum chip_class chip_class;
boolean has_vertex_cache;
boolean keep_tiling_flags;
+   booldiscard_draw;
unsigneddefault_ps_gprs, default_vs_gprs;
unsignedr6xx_num_clause_temp_gprs;
unsignedbackend_mask;
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 7d07008..43af934 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context 
*rctx)
 /* Adjust GPR allocation on R6xx/R7xx */
 void r600_adjust_gprs(struct r600_context *rctx)
 {
-   unsigned num_ps_gprs = rctx->default_ps_gprs;
-   unsigned num_vs_gprs = rctx->default_vs_gprs;
+   unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
+   unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
+   unsigned new_num_ps_gprs = num_ps_gprs;
+   unsigned new_num_vs_gprs = num_vs_gprs;
+   unsigned cur_num_ps_gprs = 
G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+   unsigned cur_num_vs_gprs = 
G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+   unsigned def_num_ps_gprs = rctx->default_ps_gprs;
+   unsigned def_num_vs_gprs = rctx->default_vs_gprs;
+   unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
+   /* hardware will reserve twice num_clause_temp_gprs */
+   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
def_num_clause_temp_gprs * 2;
unsigned tmp;
-   int diff;
 
-   if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
-   diff = rctx->ps_shader->current->shader.bc.ngpr - 
rctx->default_ps_gprs;
-   num_vs_gprs -= diff;
-   num_ps_gprs += diff;
+   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs 
*/
+   if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > 
cur_num_vs_gprs) {
+   /* try to use switch back to default */
+   if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > 
def_num_vs_gprs) {
+   /* always privilege vs stage so that at worst we have 
the
+* pixel stage producing wrong output (not the vertex
+* stage) */
+   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
def_num_clause_temp_gprs * 2);
+   new_num_vs_gprs = num_vs_gprs;
+   } else {
+   new_num_ps_gprs = def_num_ps_gprs;
+   new_num_vs_gprs = def_num_vs_gprs;
+   }
+   } else {
+   rctx->discard_draw = false;
+   return;
}
 
-   if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs)
-   {
-   diff = rctx->vs_shader->current->shader.bc.ngpr - 
rctx->default_vs_gprs;
-   num_ps_gprs -= diff;
-   num_vs_gprs += diff;
+   /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
+* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
+* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
+* it will lockup. So in this case just discard the draw command
+* and don't change the current gprs repartitions.
+*/
+   rctx->discard_draw = false;
+   if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) {
+   rctx->discard_draw = true;
+   return;
}
 
-   tmp = 0;
-   tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
-   tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
-   tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs);
-
-   if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) {
+   /* in some case we endup recomputing the current value */
+   tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
+   S_008C04_NUM_VS_GPRS(ne