Re: [Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions
On 10/10/18 10:31 am, Ian Romanick wrote: On 10/09/2018 03:42 PM, Timothy Arceri wrote: Hi Ian, You might be interested in doing something like this [1]. It needs some tidy up (and likely further testing) but my initial results showed a fairly large number of select can be converted to ifs just based on having a ubo load. [1] https://gitlab.freedesktop.org/tarceri/mesa/commits/bcsel-to-if So... this is basically doing the inverse of nir_opt_peephole_select if one of the bcsel choices is the only use of a UBO load? That is clever. :) Yep it will also continue past the UBO load gathering as many instructions as it can that are only used to produce the input of the bcsel. It could be easily expanded for your expensive math. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions
On 10/09/2018 03:42 PM, Timothy Arceri wrote: > Hi Ian, > > You might be interested in doing something like this [1]. It needs some > tidy up (and likely further testing) but my initial results showed a > fairly large number of select can be converted to ifs just based on > having a ubo load. > > [1] https://gitlab.freedesktop.org/tarceri/mesa/commits/bcsel-to-if So... this is basically doing the inverse of nir_opt_peephole_select if one of the bcsel choices is the only use of a UBO load? That is clever. :) > On 30/8/18 3:35 pm, Ian Romanick wrote: >> From: Ian Romanick >> >> On some GPUs, especially older Intel GPUs, some math instructions are >> very expensive. On those architectures, don't reduce flow control to a >> csel if one of the branches contains one of these expensive math >> instructions. >> >> This prevents a bunch of cycle count regressions on pre-Gen6 platforms >> with a later patch (intel/compiler: More peephole select for pre-Gen6). >> >> Signed-off-by: Ian Romanick >> --- >> src/amd/vulkan/radv_shader.c | 2 +- >> src/broadcom/compiler/nir_to_vir.c | 2 +- >> src/compiler/nir/nir.h | 2 +- >> src/compiler/nir/nir_opt_peephole_select.c | 46 >> +++- >> src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- >> src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- >> src/gallium/drivers/vc4/vc4_program.c | 2 +- >> src/intel/compiler/brw_nir.c | 4 +-- >> src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- >> 9 files changed, 47 insertions(+), 17 deletions(-) >> >> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c >> index 632512db09b..c8d502a9e3a 100644 >> --- a/src/amd/vulkan/radv_shader.c >> +++ b/src/amd/vulkan/radv_shader.c >> @@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool >> optimize_conservatively) >> NIR_PASS(progress, shader, nir_opt_if); >> NIR_PASS(progress, shader, nir_opt_dead_cf); >> NIR_PASS(progress, shader, nir_opt_cse); >> - NIR_PASS(progress, shader, nir_opt_peephole_select, >> 8, true); >> + NIR_PASS(progress, shader, nir_opt_peephole_select, >> 8, true, true); >> NIR_PASS(progress, shader, nir_opt_algebraic); >> NIR_PASS(progress, shader, nir_opt_constant_folding); >> NIR_PASS(progress, shader, nir_opt_undef); >> diff --git a/src/broadcom/compiler/nir_to_vir.c >> b/src/broadcom/compiler/nir_to_vir.c >> index 0d23cea4d5b..ec0ff4b907a 100644 >> --- a/src/broadcom/compiler/nir_to_vir.c >> +++ b/src/broadcom/compiler/nir_to_vir.c >> @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s) >> NIR_PASS(progress, s, nir_opt_dce); >> NIR_PASS(progress, s, nir_opt_dead_cf); >> NIR_PASS(progress, s, nir_opt_cse); >> - NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); >> + NIR_PASS(progress, s, nir_opt_peephole_select, 8, >> true, true); >> NIR_PASS(progress, s, nir_opt_algebraic); >> NIR_PASS(progress, s, nir_opt_constant_folding); >> NIR_PASS(progress, s, nir_opt_undef); >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> index 67fa46d5557..feb69be6b59 100644 >> --- a/src/compiler/nir/nir.h >> +++ b/src/compiler/nir/nir.h >> @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader); >> bool nir_opt_move_load_ubo(nir_shader *shader); >> bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, >> - bool indirect_load_ok); >> + bool indirect_load_ok, bool >> expensive_alu_ok); >> bool nir_opt_remove_phis_impl(nir_function_impl *impl); >> bool nir_opt_remove_phis(nir_shader *shader); >> diff --git a/src/compiler/nir/nir_opt_peephole_select.c >> b/src/compiler/nir/nir_opt_peephole_select.c >> index 6808d3eda6c..09b55f3739e 100644 >> --- a/src/compiler/nir/nir_opt_peephole_select.c >> +++ b/src/compiler/nir/nir_opt_peephole_select.c >> @@ -59,7 +59,8 @@ >> static bool >> block_check_for_allowed_instrs(nir_block *block, unsigned *count, >> - bool alu_ok, bool indirect_load_ok) >> + bool alu_ok, bool indirect_load_ok, >> + bool expensive_alu_ok) >> { >> nir_foreach_instr(instr, block) { >> switch (instr->type) { >> @@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, >> unsigned *count, >> case nir_op_vec3: >> case nir_op_vec4: >> break; >> + >> + case nir_op_fcos: >> + case nir_op_fdiv: >> + case nir_op_fexp2: >> + case nir_op_flog2: >> + case nir_op_fmod: >> + case nir_op_fpow: >> + case nir_op_frcp: >> + case
Re: [Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions
Hi Ian, You might be interested in doing something like this [1]. It needs some tidy up (and likely further testing) but my initial results showed a fairly large number of select can be converted to ifs just based on having a ubo load. [1] https://gitlab.freedesktop.org/tarceri/mesa/commits/bcsel-to-if On 30/8/18 3:35 pm, Ian Romanick wrote: From: Ian Romanick On some GPUs, especially older Intel GPUs, some math instructions are very expensive. On those architectures, don't reduce flow control to a csel if one of the branches contains one of these expensive math instructions. This prevents a bunch of cycle count regressions on pre-Gen6 platforms with a later patch (intel/compiler: More peephole select for pre-Gen6). Signed-off-by: Ian Romanick --- src/amd/vulkan/radv_shader.c | 2 +- src/broadcom/compiler/nir_to_vir.c | 2 +- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_opt_peephole_select.c | 46 +++- src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- src/gallium/drivers/vc4/vc4_program.c| 2 +- src/intel/compiler/brw_nir.c | 4 +-- src/mesa/state_tracker/st_glsl_to_nir.cpp| 2 +- 9 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 632512db09b..c8d502a9e3a 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) NIR_PASS(progress, shader, nir_opt_if); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); -NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true); +NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_undef); diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 0d23cea4d5b..ec0ff4b907a 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); -NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); +NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 67fa46d5557..feb69be6b59 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader); bool nir_opt_move_load_ubo(nir_shader *shader); bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, - bool indirect_load_ok); + bool indirect_load_ok, bool expensive_alu_ok); bool nir_opt_remove_phis_impl(nir_function_impl *impl); bool nir_opt_remove_phis(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 6808d3eda6c..09b55f3739e 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -59,7 +59,8 @@ static bool block_check_for_allowed_instrs(nir_block *block, unsigned *count, - bool alu_ok, bool indirect_load_ok) + bool alu_ok, bool indirect_load_ok, + bool expensive_alu_ok) { nir_foreach_instr(instr, block) { switch (instr->type) { @@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, case nir_op_vec3: case nir_op_vec4: break; + + case nir_op_fcos: + case nir_op_fdiv: + case nir_op_fexp2: + case nir_op_flog2: + case nir_op_fmod: + case nir_op_fpow: + case nir_op_frcp: + case nir_op_frem: + case nir_op_frsq: + case nir_op_fsin: + case nir_op_idiv: + case nir_op_irem: + case nir_op_udiv: +if (!alu_ok || !expensive_alu_ok) + return false; + +break; + default: if (!alu_ok) { /* It must be a move-like operation. */ @@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, static bool
Re: [Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions
Drive-by comment: At some point, maybe we just want to replace all these booleans with a function pointer that takes an instruction and returns a bool. That way the driver can define "expensive". Do with that what you will. On October 9, 2018 11:50:45 Ian Romanick wrote: On 10/08/2018 01:34 PM, Thomas Helland wrote: Den tor. 30. aug. 2018 kl. 07:37 skrev Ian Romanick : From: Ian Romanick On some GPUs, especially older Intel GPUs, some math instructions are very expensive. On those architectures, don't reduce flow control to a csel if one of the branches contains one of these expensive math instructions. This prevents a bunch of cycle count regressions on pre-Gen6 platforms with a later patch (intel/compiler: More peephole select for pre-Gen6). Signed-off-by: Ian Romanick --- src/amd/vulkan/radv_shader.c | 2 +- src/broadcom/compiler/nir_to_vir.c | 2 +- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_opt_peephole_select.c | 46 +++- src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- src/gallium/drivers/vc4/vc4_program.c| 2 +- src/intel/compiler/brw_nir.c | 4 +-- src/mesa/state_tracker/st_glsl_to_nir.cpp| 2 +- 9 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 632512db09b..c8d502a9e3a 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) NIR_PASS(progress, shader, nir_opt_if); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); -NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true); +NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_undef); diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 0d23cea4d5b..ec0ff4b907a 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); -NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); +NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 67fa46d5557..feb69be6b59 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader); bool nir_opt_move_load_ubo(nir_shader *shader); bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, - bool indirect_load_ok); + bool indirect_load_ok, bool expensive_alu_ok); bool nir_opt_remove_phis_impl(nir_function_impl *impl); bool nir_opt_remove_phis(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 6808d3eda6c..09b55f3739e 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -59,7 +59,8 @@ static bool block_check_for_allowed_instrs(nir_block *block, unsigned *count, - bool alu_ok, bool indirect_load_ok) + bool alu_ok, bool indirect_load_ok, + bool expensive_alu_ok) { nir_foreach_instr(instr, block) { switch (instr->type) { @@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, case nir_op_vec3: case nir_op_vec4: break; + + case nir_op_fcos: + case nir_op_fdiv: + case nir_op_fexp2: + case nir_op_flog2: + case nir_op_fmod: + case nir_op_fpow: + case nir_op_frcp: + case nir_op_frem: + case nir_op_frsq: + case nir_op_fsin: + case nir_op_idiv: + case nir_op_irem: + case nir_op_udiv: +if (!alu_ok || !expensive_alu_ok) + return false; + +break; + default: if (!alu_ok) { /* It must be a move-like operation. */ @@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, static bool
Re: [Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions
On 10/08/2018 01:34 PM, Thomas Helland wrote: > Den tor. 30. aug. 2018 kl. 07:37 skrev Ian Romanick : >> >> From: Ian Romanick >> >> On some GPUs, especially older Intel GPUs, some math instructions are >> very expensive. On those architectures, don't reduce flow control to a >> csel if one of the branches contains one of these expensive math >> instructions. >> >> This prevents a bunch of cycle count regressions on pre-Gen6 platforms >> with a later patch (intel/compiler: More peephole select for pre-Gen6). >> >> Signed-off-by: Ian Romanick >> --- >> src/amd/vulkan/radv_shader.c | 2 +- >> src/broadcom/compiler/nir_to_vir.c | 2 +- >> src/compiler/nir/nir.h | 2 +- >> src/compiler/nir/nir_opt_peephole_select.c | 46 >> +++- >> src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- >> src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- >> src/gallium/drivers/vc4/vc4_program.c| 2 +- >> src/intel/compiler/brw_nir.c | 4 +-- >> src/mesa/state_tracker/st_glsl_to_nir.cpp| 2 +- >> 9 files changed, 47 insertions(+), 17 deletions(-) >> >> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c >> index 632512db09b..c8d502a9e3a 100644 >> --- a/src/amd/vulkan/radv_shader.c >> +++ b/src/amd/vulkan/radv_shader.c >> @@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool >> optimize_conservatively) >> NIR_PASS(progress, shader, nir_opt_if); >> NIR_PASS(progress, shader, nir_opt_dead_cf); >> NIR_PASS(progress, shader, nir_opt_cse); >> -NIR_PASS(progress, shader, nir_opt_peephole_select, 8, >> true); >> +NIR_PASS(progress, shader, nir_opt_peephole_select, 8, >> true, true); >> NIR_PASS(progress, shader, nir_opt_algebraic); >> NIR_PASS(progress, shader, nir_opt_constant_folding); >> NIR_PASS(progress, shader, nir_opt_undef); >> diff --git a/src/broadcom/compiler/nir_to_vir.c >> b/src/broadcom/compiler/nir_to_vir.c >> index 0d23cea4d5b..ec0ff4b907a 100644 >> --- a/src/broadcom/compiler/nir_to_vir.c >> +++ b/src/broadcom/compiler/nir_to_vir.c >> @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s) >> NIR_PASS(progress, s, nir_opt_dce); >> NIR_PASS(progress, s, nir_opt_dead_cf); >> NIR_PASS(progress, s, nir_opt_cse); >> -NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); >> +NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, >> true); >> NIR_PASS(progress, s, nir_opt_algebraic); >> NIR_PASS(progress, s, nir_opt_constant_folding); >> NIR_PASS(progress, s, nir_opt_undef); >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> index 67fa46d5557..feb69be6b59 100644 >> --- a/src/compiler/nir/nir.h >> +++ b/src/compiler/nir/nir.h >> @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader); >> bool nir_opt_move_load_ubo(nir_shader *shader); >> >> bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, >> - bool indirect_load_ok); >> + bool indirect_load_ok, bool expensive_alu_ok); >> >> bool nir_opt_remove_phis_impl(nir_function_impl *impl); >> bool nir_opt_remove_phis(nir_shader *shader); >> diff --git a/src/compiler/nir/nir_opt_peephole_select.c >> b/src/compiler/nir/nir_opt_peephole_select.c >> index 6808d3eda6c..09b55f3739e 100644 >> --- a/src/compiler/nir/nir_opt_peephole_select.c >> +++ b/src/compiler/nir/nir_opt_peephole_select.c >> @@ -59,7 +59,8 @@ >> >> static bool >> block_check_for_allowed_instrs(nir_block *block, unsigned *count, >> - bool alu_ok, bool indirect_load_ok) >> + bool alu_ok, bool indirect_load_ok, >> + bool expensive_alu_ok) >> { >> nir_foreach_instr(instr, block) { >>switch (instr->type) { >> @@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, >> unsigned *count, >> case nir_op_vec3: >> case nir_op_vec4: >> break; >> + >> + case nir_op_fcos: >> + case nir_op_fdiv: >> + case nir_op_fexp2: >> + case nir_op_flog2: >> + case nir_op_fmod: >> + case nir_op_fpow: >> + case nir_op_frcp: >> + case nir_op_frem: >> + case nir_op_frsq: >> + case nir_op_fsin: >> + case nir_op_idiv: >> + case nir_op_irem: >> + case nir_op_udiv: >> +if (!alu_ok || !expensive_alu_ok) >> + return false; >> + >> +break; >> + >> default: >> if (!alu_ok) { >> /* It must be a move-like operation. */ >> @@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, >> unsigned
Re: [Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions
Den tor. 30. aug. 2018 kl. 07:37 skrev Ian Romanick : > > From: Ian Romanick > > On some GPUs, especially older Intel GPUs, some math instructions are > very expensive. On those architectures, don't reduce flow control to a > csel if one of the branches contains one of these expensive math > instructions. > > This prevents a bunch of cycle count regressions on pre-Gen6 platforms > with a later patch (intel/compiler: More peephole select for pre-Gen6). > > Signed-off-by: Ian Romanick > --- > src/amd/vulkan/radv_shader.c | 2 +- > src/broadcom/compiler/nir_to_vir.c | 2 +- > src/compiler/nir/nir.h | 2 +- > src/compiler/nir/nir_opt_peephole_select.c | 46 > +++- > src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- > src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- > src/gallium/drivers/vc4/vc4_program.c| 2 +- > src/intel/compiler/brw_nir.c | 4 +-- > src/mesa/state_tracker/st_glsl_to_nir.cpp| 2 +- > 9 files changed, 47 insertions(+), 17 deletions(-) > > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c > index 632512db09b..c8d502a9e3a 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool > optimize_conservatively) > NIR_PASS(progress, shader, nir_opt_if); > NIR_PASS(progress, shader, nir_opt_dead_cf); > NIR_PASS(progress, shader, nir_opt_cse); > -NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true); > +NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, > true); > NIR_PASS(progress, shader, nir_opt_algebraic); > NIR_PASS(progress, shader, nir_opt_constant_folding); > NIR_PASS(progress, shader, nir_opt_undef); > diff --git a/src/broadcom/compiler/nir_to_vir.c > b/src/broadcom/compiler/nir_to_vir.c > index 0d23cea4d5b..ec0ff4b907a 100644 > --- a/src/broadcom/compiler/nir_to_vir.c > +++ b/src/broadcom/compiler/nir_to_vir.c > @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s) > NIR_PASS(progress, s, nir_opt_dce); > NIR_PASS(progress, s, nir_opt_dead_cf); > NIR_PASS(progress, s, nir_opt_cse); > -NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); > +NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, > true); > NIR_PASS(progress, s, nir_opt_algebraic); > NIR_PASS(progress, s, nir_opt_constant_folding); > NIR_PASS(progress, s, nir_opt_undef); > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index 67fa46d5557..feb69be6b59 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader); > bool nir_opt_move_load_ubo(nir_shader *shader); > > bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, > - bool indirect_load_ok); > + bool indirect_load_ok, bool expensive_alu_ok); > > bool nir_opt_remove_phis_impl(nir_function_impl *impl); > bool nir_opt_remove_phis(nir_shader *shader); > diff --git a/src/compiler/nir/nir_opt_peephole_select.c > b/src/compiler/nir/nir_opt_peephole_select.c > index 6808d3eda6c..09b55f3739e 100644 > --- a/src/compiler/nir/nir_opt_peephole_select.c > +++ b/src/compiler/nir/nir_opt_peephole_select.c > @@ -59,7 +59,8 @@ > > static bool > block_check_for_allowed_instrs(nir_block *block, unsigned *count, > - bool alu_ok, bool indirect_load_ok) > + bool alu_ok, bool indirect_load_ok, > + bool expensive_alu_ok) > { > nir_foreach_instr(instr, block) { >switch (instr->type) { > @@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, > unsigned *count, > case nir_op_vec3: > case nir_op_vec4: > break; > + > + case nir_op_fcos: > + case nir_op_fdiv: > + case nir_op_fexp2: > + case nir_op_flog2: > + case nir_op_fmod: > + case nir_op_fpow: > + case nir_op_frcp: > + case nir_op_frem: > + case nir_op_frsq: > + case nir_op_fsin: > + case nir_op_idiv: > + case nir_op_irem: > + case nir_op_udiv: > +if (!alu_ok || !expensive_alu_ok) > + return false; > + > +break; > + > default: > if (!alu_ok) { > /* It must be a move-like operation. */ > @@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned > *count, > > static bool > nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, > - unsigned limit, bool
[Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions
From: Ian Romanick On some GPUs, especially older Intel GPUs, some math instructions are very expensive. On those architectures, don't reduce flow control to a csel if one of the branches contains one of these expensive math instructions. This prevents a bunch of cycle count regressions on pre-Gen6 platforms with a later patch (intel/compiler: More peephole select for pre-Gen6). Signed-off-by: Ian Romanick --- src/amd/vulkan/radv_shader.c | 2 +- src/broadcom/compiler/nir_to_vir.c | 2 +- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_opt_peephole_select.c | 46 +++- src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +- src/gallium/drivers/radeonsi/si_shader_nir.c | 2 +- src/gallium/drivers/vc4/vc4_program.c| 2 +- src/intel/compiler/brw_nir.c | 4 +-- src/mesa/state_tracker/st_glsl_to_nir.cpp| 2 +- 9 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 632512db09b..c8d502a9e3a 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) NIR_PASS(progress, shader, nir_opt_if); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); -NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true); +NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_undef); diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 0d23cea4d5b..ec0ff4b907a 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); -NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); +NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 67fa46d5557..feb69be6b59 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader); bool nir_opt_move_load_ubo(nir_shader *shader); bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, - bool indirect_load_ok); + bool indirect_load_ok, bool expensive_alu_ok); bool nir_opt_remove_phis_impl(nir_function_impl *impl); bool nir_opt_remove_phis(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 6808d3eda6c..09b55f3739e 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -59,7 +59,8 @@ static bool block_check_for_allowed_instrs(nir_block *block, unsigned *count, - bool alu_ok, bool indirect_load_ok) + bool alu_ok, bool indirect_load_ok, + bool expensive_alu_ok) { nir_foreach_instr(instr, block) { switch (instr->type) { @@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, case nir_op_vec3: case nir_op_vec4: break; + + case nir_op_fcos: + case nir_op_fdiv: + case nir_op_fexp2: + case nir_op_flog2: + case nir_op_fmod: + case nir_op_fpow: + case nir_op_frcp: + case nir_op_frem: + case nir_op_frsq: + case nir_op_fsin: + case nir_op_idiv: + case nir_op_irem: + case nir_op_udiv: +if (!alu_ok || !expensive_alu_ok) + return false; + +break; + default: if (!alu_ok) { /* It must be a move-like operation. */ @@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, static bool nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, - unsigned limit, bool indirect_load_ok) + unsigned limit, bool indirect_load_ok, + bool expensive_alu_ok) { if (nir_cf_node_is_first(>cf_node)) return false; @@ -180,10 +201,17 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader