Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu v2
This looks good to me. Thank you. Marek On Tue, Oct 30, 2012 at 11:04 PM, wrote: > From: Jerome Glisse > > On r6xx/r7xx shader resource management need to make sure that the > shader does not goes over the gpr register limit. Each specific > asic has a maxmimum register that can be split btw shader stage. > For each stage the shader must not use more register than the > limit programmed. > > v2: Print an error message when discarding draw. Don't add another > boolean to context structure, but rather propagate the discard > boolean through the call chain. > > Signed-off-by: Jerome Glisse > --- > src/gallium/drivers/r600/r600_pipe.h | 2 +- > src/gallium/drivers/r600/r600_state.c| 67 > +++- > src/gallium/drivers/r600/r600_state_common.c | 27 ++- > 3 files changed, 62 insertions(+), 34 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index ff2a5fd..3edef40 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -595,7 +595,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx); > void *r600_create_resolve_blend(struct r600_context *rctx); > void *r700_create_resolve_blend(struct r600_context *rctx); > void *r600_create_decompress_blend(struct r600_context *rctx); > -void r600_adjust_gprs(struct r600_context *rctx); > +bool r600_adjust_gprs(struct r600_context *rctx); > boolean r600_is_format_supported(struct pipe_screen *screen, > enum pipe_format format, > enum pipe_texture_target target, > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 7d07008..76fe44d 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2187,36 +2187,61 @@ void r600_init_state_functions(struct r600_context > *rctx) > } > > /* Adjust GPR allocation on R6xx/R7xx */ > -void r600_adjust_gprs(struct r600_context *rctx) > +bool r600_adjust_gprs(struct r600_context *rctx) > { > - unsigned num_ps_gprs = rctx->default_ps_gprs; > - unsigned num_vs_gprs = rctx->default_vs_gprs; > + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; > + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; > + unsigned new_num_ps_gprs = num_ps_gprs; > + unsigned new_num_vs_gprs = num_vs_gprs; > + unsigned cur_num_ps_gprs = > G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned cur_num_vs_gprs = > G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned def_num_ps_gprs = rctx->default_ps_gprs; > + unsigned def_num_vs_gprs = rctx->default_vs_gprs; > + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; > + /* hardware will reserve twice num_clause_temp_gprs */ > + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + > def_num_clause_temp_gprs * 2; > unsigned tmp; > - int diff; > > - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) > { > - diff = rctx->ps_shader->current->shader.bc.ngpr - > rctx->default_ps_gprs; > - num_vs_gprs -= diff; > - num_ps_gprs += diff; > - } > - > - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) > - { > - diff = rctx->vs_shader->current->shader.bc.ngpr - > rctx->default_vs_gprs; > - num_ps_gprs -= diff; > - num_vs_gprs += diff; > + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to > max_gprs */ > + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > > cur_num_vs_gprs) { > + /* try to use switch back to default */ > + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > > def_num_vs_gprs) { > + /* always privilege vs stage so that at worst we have > the > +* pixel stage producing wrong output (not the vertex > +* stage) */ > + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + > def_num_clause_temp_gprs * 2); > + new_num_vs_gprs = num_vs_gprs; > + } else { > + new_num_ps_gprs = def_num_ps_gprs; > + new_num_vs_gprs = def_num_vs_gprs; > + } > + } else { > + return true; > } > > - tmp = 0; > - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); > - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); > - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs); > - > - if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) { > + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= > +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the
[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu v2
From: Jerome Glisse On r6xx/r7xx shader resource management need to make sure that the shader does not goes over the gpr register limit. Each specific asic has a maxmimum register that can be split btw shader stage. For each stage the shader must not use more register than the limit programmed. v2: Print an error message when discarding draw. Don't add another boolean to context structure, but rather propagate the discard boolean through the call chain. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_pipe.h | 2 +- src/gallium/drivers/r600/r600_state.c| 67 +++- src/gallium/drivers/r600/r600_state_common.c | 27 ++- 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ff2a5fd..3edef40 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -595,7 +595,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx); void *r600_create_resolve_blend(struct r600_context *rctx); void *r700_create_resolve_blend(struct r600_context *rctx); void *r600_create_decompress_blend(struct r600_context *rctx); -void r600_adjust_gprs(struct r600_context *rctx); +bool r600_adjust_gprs(struct r600_context *rctx); boolean r600_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d07008..76fe44d 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2187,36 +2187,61 @@ void r600_init_state_functions(struct r600_context *rctx) } /* Adjust GPR allocation on R6xx/R7xx */ -void r600_adjust_gprs(struct r600_context *rctx) +bool r600_adjust_gprs(struct r600_context *rctx) { - unsigned num_ps_gprs = rctx->default_ps_gprs; - unsigned num_vs_gprs = rctx->default_vs_gprs; + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; + unsigned new_num_ps_gprs = num_ps_gprs; + unsigned new_num_vs_gprs = num_vs_gprs; + unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + unsigned def_num_ps_gprs = rctx->default_ps_gprs; + unsigned def_num_vs_gprs = rctx->default_vs_gprs; + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; + /* hardware will reserve twice num_clause_temp_gprs */ + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2; unsigned tmp; - int diff; - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) { - diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs; - num_vs_gprs -= diff; - num_ps_gprs += diff; - } - - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) - { - diff = rctx->vs_shader->current->shader.bc.ngpr - rctx->default_vs_gprs; - num_ps_gprs -= diff; - num_vs_gprs += diff; + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */ + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs) { + /* try to use switch back to default */ + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs) { + /* always privilege vs stage so that at worst we have the +* pixel stage producing wrong output (not the vertex +* stage) */ + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + def_num_clause_temp_gprs * 2); + new_num_vs_gprs = num_vs_gprs; + } else { + new_num_ps_gprs = def_num_ps_gprs; + new_num_vs_gprs = def_num_vs_gprs; + } + } else { + return true; } - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs); - - if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) { + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup +* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS +* it will lockup. So in this case just discard the draw command +* and don't change the current gprs repartitions. +*/ + if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs >
Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu
On Fri, Oct 26, 2012 at 10:01 PM, wrote: > From: Jerome Glisse > > On r6xx/r7xx shader resource management need to make sure that the > shader does not goes over the gpr register limit. Each specific > asic has a maxmimum register that can be split btw shader stage. > For each stage the shader must not use more register than the > limit programmed. We may also want to add a new parameter to the radeon info ioctl to fetch the GPR limit for each asic. Alex > > Signed-off-by: Jerome Glisse > --- > src/gallium/drivers/r600/r600_pipe.h | 1 + > src/gallium/drivers/r600/r600_state.c| 60 > +++- > src/gallium/drivers/r600/r600_state_common.c | 22 +- > 3 files changed, 55 insertions(+), 28 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index ff2a5fd..2045af3 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -363,6 +363,7 @@ struct r600_context { > enum chip_class chip_class; > boolean has_vertex_cache; > boolean keep_tiling_flags; > + booldiscard_draw; > unsigneddefault_ps_gprs, default_vs_gprs; > unsignedr6xx_num_clause_temp_gprs; > unsignedbackend_mask; > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 7d07008..43af934 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context > *rctx) > /* Adjust GPR allocation on R6xx/R7xx */ > void r600_adjust_gprs(struct r600_context *rctx) > { > - unsigned num_ps_gprs = rctx->default_ps_gprs; > - unsigned num_vs_gprs = rctx->default_vs_gprs; > + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; > + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; > + unsigned new_num_ps_gprs = num_ps_gprs; > + unsigned new_num_vs_gprs = num_vs_gprs; > + unsigned cur_num_ps_gprs = > G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned cur_num_vs_gprs = > G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned def_num_ps_gprs = rctx->default_ps_gprs; > + unsigned def_num_vs_gprs = rctx->default_vs_gprs; > + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; > + /* hardware will reserve twice num_clause_temp_gprs */ > + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + > def_num_clause_temp_gprs * 2; > unsigned tmp; > - int diff; > > - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) > { > - diff = rctx->ps_shader->current->shader.bc.ngpr - > rctx->default_ps_gprs; > - num_vs_gprs -= diff; > - num_ps_gprs += diff; > + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to > max_gprs */ > + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > > cur_num_vs_gprs) { > + /* try to use switch back to default */ > + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > > def_num_vs_gprs) { > + /* always privilege vs stage so that at worst we have > the > +* pixel stage producing wrong output (not the vertex > +* stage) */ > + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + > def_num_clause_temp_gprs * 2); > + new_num_vs_gprs = num_vs_gprs; > + } else { > + new_num_ps_gprs = def_num_ps_gprs; > + new_num_vs_gprs = def_num_vs_gprs; > + } > + } else { > + rctx->discard_draw = false; > + return; > } > > - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) > - { > - diff = rctx->vs_shader->current->shader.bc.ngpr - > rctx->default_vs_gprs; > - num_ps_gprs -= diff; > - num_vs_gprs += diff; > + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= > +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup > +* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS > +* it will lockup. So in this case just discard the draw command > +* and don't change the current gprs repartitions. > +*/ > + rctx->discard_draw = false; > + if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) { > + rctx->discard_draw = true; > + return; > } > > - tmp = 0; > - tmp |= S_008C04_NUM_PS_GPRS(num_ps_
Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu
FWIW, instead of putting the discard_draw flag in r600_context, it would be cleaner to have r600_adjust_gprs return false if drawing should be skipped, then r600_update_derived_state would return false and draw_vbo would skip rendering. That way you wouldn't have to add any comments in draw_vbo, because it would be clear where the error is coming from. Using R600_ERR to report the error should be sufficient. r600_adjust_gprs seems to be the best place for that. Marek On Sat, Oct 27, 2012 at 3:47 AM, Jerome Glisse wrote: > On Fri, Oct 26, 2012 at 10:01 PM, wrote: >> From: Jerome Glisse >> >> On r6xx/r7xx shader resource management need to make sure that the >> shader does not goes over the gpr register limit. Each specific >> asic has a maxmimum register that can be split btw shader stage. >> For each stage the shader must not use more register than the >> limit programmed. >> >> Signed-off-by: Jerome Glisse > > I haven't yet fully tested it on wide range of GPU but it fixes piglit > case that were locking up o one can directly use quick-drivers. I > mostly would like feedback on if we should print a warning when we > discard a draw command because shader exceed limit. > > Note that with this patch the test that were locking up fails but with > a simple patch on top of that (decreasing clause temp gpr to 2) they > pass. > > Regards, > Jerome > >> --- >> src/gallium/drivers/r600/r600_pipe.h | 1 + >> src/gallium/drivers/r600/r600_state.c| 60 >> +++- >> src/gallium/drivers/r600/r600_state_common.c | 22 +- >> 3 files changed, 55 insertions(+), 28 deletions(-) >> >> diff --git a/src/gallium/drivers/r600/r600_pipe.h >> b/src/gallium/drivers/r600/r600_pipe.h >> index ff2a5fd..2045af3 100644 >> --- a/src/gallium/drivers/r600/r600_pipe.h >> +++ b/src/gallium/drivers/r600/r600_pipe.h >> @@ -363,6 +363,7 @@ struct r600_context { >> enum chip_class chip_class; >> boolean has_vertex_cache; >> boolean keep_tiling_flags; >> + booldiscard_draw; >> unsigneddefault_ps_gprs, default_vs_gprs; >> unsignedr6xx_num_clause_temp_gprs; >> unsignedbackend_mask; >> diff --git a/src/gallium/drivers/r600/r600_state.c >> b/src/gallium/drivers/r600/r600_state.c >> index 7d07008..43af934 100644 >> --- a/src/gallium/drivers/r600/r600_state.c >> +++ b/src/gallium/drivers/r600/r600_state.c >> @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context >> *rctx) >> /* Adjust GPR allocation on R6xx/R7xx */ >> void r600_adjust_gprs(struct r600_context *rctx) >> { >> - unsigned num_ps_gprs = rctx->default_ps_gprs; >> - unsigned num_vs_gprs = rctx->default_vs_gprs; >> + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; >> + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; >> + unsigned new_num_ps_gprs = num_ps_gprs; >> + unsigned new_num_vs_gprs = num_vs_gprs; >> + unsigned cur_num_ps_gprs = >> G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); >> + unsigned cur_num_vs_gprs = >> G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); >> + unsigned def_num_ps_gprs = rctx->default_ps_gprs; >> + unsigned def_num_vs_gprs = rctx->default_vs_gprs; >> + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; >> + /* hardware will reserve twice num_clause_temp_gprs */ >> + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + >> def_num_clause_temp_gprs * 2; >> unsigned tmp; >> - int diff; >> >> - if (rctx->ps_shader->current->shader.bc.ngpr > >> rctx->default_ps_gprs) { >> - diff = rctx->ps_shader->current->shader.bc.ngpr - >> rctx->default_ps_gprs; >> - num_vs_gprs -= diff; >> - num_ps_gprs += diff; >> + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to >> max_gprs */ >> + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > >> cur_num_vs_gprs) { >> + /* try to use switch back to default */ >> + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > >> def_num_vs_gprs) { >> + /* always privilege vs stage so that at worst we >> have the >> +* pixel stage producing wrong output (not the vertex >> +* stage) */ >> + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + >> def_num_clause_temp_gprs * 2); >> + new_num_vs_gprs = num_vs_gprs; >> + } else { >> + new_num_ps_gprs = def_num_ps_gprs; >> + new_num_vs_gprs = def_num_vs_gprs; >> + } >> + } else { >> + rctx->discard_draw = false; >> +
Re: [Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu
On Fri, Oct 26, 2012 at 10:01 PM, wrote: > From: Jerome Glisse > > On r6xx/r7xx shader resource management need to make sure that the > shader does not goes over the gpr register limit. Each specific > asic has a maxmimum register that can be split btw shader stage. > For each stage the shader must not use more register than the > limit programmed. > > Signed-off-by: Jerome Glisse I haven't yet fully tested it on wide range of GPU but it fixes piglit case that were locking up o one can directly use quick-drivers. I mostly would like feedback on if we should print a warning when we discard a draw command because shader exceed limit. Note that with this patch the test that were locking up fails but with a simple patch on top of that (decreasing clause temp gpr to 2) they pass. Regards, Jerome > --- > src/gallium/drivers/r600/r600_pipe.h | 1 + > src/gallium/drivers/r600/r600_state.c| 60 > +++- > src/gallium/drivers/r600/r600_state_common.c | 22 +- > 3 files changed, 55 insertions(+), 28 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index ff2a5fd..2045af3 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -363,6 +363,7 @@ struct r600_context { > enum chip_class chip_class; > boolean has_vertex_cache; > boolean keep_tiling_flags; > + booldiscard_draw; > unsigneddefault_ps_gprs, default_vs_gprs; > unsignedr6xx_num_clause_temp_gprs; > unsignedbackend_mask; > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index 7d07008..43af934 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context > *rctx) > /* Adjust GPR allocation on R6xx/R7xx */ > void r600_adjust_gprs(struct r600_context *rctx) > { > - unsigned num_ps_gprs = rctx->default_ps_gprs; > - unsigned num_vs_gprs = rctx->default_vs_gprs; > + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; > + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; > + unsigned new_num_ps_gprs = num_ps_gprs; > + unsigned new_num_vs_gprs = num_vs_gprs; > + unsigned cur_num_ps_gprs = > G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned cur_num_vs_gprs = > G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); > + unsigned def_num_ps_gprs = rctx->default_ps_gprs; > + unsigned def_num_vs_gprs = rctx->default_vs_gprs; > + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; > + /* hardware will reserve twice num_clause_temp_gprs */ > + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + > def_num_clause_temp_gprs * 2; > unsigned tmp; > - int diff; > > - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) > { > - diff = rctx->ps_shader->current->shader.bc.ngpr - > rctx->default_ps_gprs; > - num_vs_gprs -= diff; > - num_ps_gprs += diff; > + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to > max_gprs */ > + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > > cur_num_vs_gprs) { > + /* try to use switch back to default */ > + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > > def_num_vs_gprs) { > + /* always privilege vs stage so that at worst we have > the > +* pixel stage producing wrong output (not the vertex > +* stage) */ > + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + > def_num_clause_temp_gprs * 2); > + new_num_vs_gprs = num_vs_gprs; > + } else { > + new_num_ps_gprs = def_num_ps_gprs; > + new_num_vs_gprs = def_num_vs_gprs; > + } > + } else { > + rctx->discard_draw = false; > + return; > } > > - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) > - { > - diff = rctx->vs_shader->current->shader.bc.ngpr - > rctx->default_vs_gprs; > - num_ps_gprs -= diff; > - num_vs_gprs += diff; > + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= > +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup > +* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS > +* it will lockup. So in this case just discard the draw command > +* and don't change t
[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu
From: Jerome Glisse On r6xx/r7xx shader resource management need to make sure that the shader does not goes over the gpr register limit. Each specific asic has a maxmimum register that can be split btw shader stage. For each stage the shader must not use more register than the limit programmed. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/r600/r600_state.c| 60 +++- src/gallium/drivers/r600/r600_state_common.c | 22 +- 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ff2a5fd..2045af3 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -363,6 +363,7 @@ struct r600_context { enum chip_class chip_class; boolean has_vertex_cache; boolean keep_tiling_flags; + booldiscard_draw; unsigneddefault_ps_gprs, default_vs_gprs; unsignedr6xx_num_clause_temp_gprs; unsignedbackend_mask; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d07008..43af934 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context *rctx) /* Adjust GPR allocation on R6xx/R7xx */ void r600_adjust_gprs(struct r600_context *rctx) { - unsigned num_ps_gprs = rctx->default_ps_gprs; - unsigned num_vs_gprs = rctx->default_vs_gprs; + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; + unsigned new_num_ps_gprs = num_ps_gprs; + unsigned new_num_vs_gprs = num_vs_gprs; + unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + unsigned def_num_ps_gprs = rctx->default_ps_gprs; + unsigned def_num_vs_gprs = rctx->default_vs_gprs; + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; + /* hardware will reserve twice num_clause_temp_gprs */ + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2; unsigned tmp; - int diff; - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) { - diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs; - num_vs_gprs -= diff; - num_ps_gprs += diff; + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */ + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs) { + /* try to use switch back to default */ + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs) { + /* always privilege vs stage so that at worst we have the +* pixel stage producing wrong output (not the vertex +* stage) */ + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + def_num_clause_temp_gprs * 2); + new_num_vs_gprs = num_vs_gprs; + } else { + new_num_ps_gprs = def_num_ps_gprs; + new_num_vs_gprs = def_num_vs_gprs; + } + } else { + rctx->discard_draw = false; + return; } - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) - { - diff = rctx->vs_shader->current->shader.bc.ngpr - rctx->default_vs_gprs; - num_ps_gprs -= diff; - num_vs_gprs += diff; + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= +* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup +* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS +* it will lockup. So in this case just discard the draw command +* and don't change the current gprs repartitions. +*/ + rctx->discard_draw = false; + if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) { + rctx->discard_draw = true; + return; } - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs); - - if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) { + /* in some case we endup recomputing the current value */ + tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) | + S_008C04_NUM_VS_GPRS(ne