Re: [Mesa-dev] [PATCH] radeonsi: eliminate trivial constant VS outputs
Hi Marek Tested-by: Edmondo Tommasina I tested the patch with Witcher 2. Thanks edmondo On Tue, Oct 18, 2016 at 6:28 PM, Marek Olšák wrote: > From: Marek Olšák > > These constant value VS PARAM exports: > - 0,0,0,0 > - 0,0,0,1 > - 1,1,1,0 > - 1,1,1,1 > can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports > can be removed from the IR to save export & parameter memory. > > After LLVM optimizations, analyze the IR to see which exports are equal to > the ones listed above (or undef) and remove them if they are. > > Targeted use cases: > - All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them > are unused by PS (such as Witcher 2 below). > - VS output arrays with unused elements that the GLSL compiler can't > eliminate (such as Batman below). > > The shader-db deltas are quite interesting: > (not from upstream si-report.py, it won't be upstreamed) > > PERCENTAGE DELTASShaders PARAM exports (affected only) > batman_arkham_origins589 -67.17 % > bioshock-infinite 1769 -0.47 % > dirt-showdown548 -2.68 % > dota2 1747 -3.36 % > f1-2015 776 -4.94 % > left_4_dead_2 1762 -0.07 % > metro_2033_redux2670 -0.43 % > portal 474 -0.22 % > talos_principle 324 -3.63 % > warsow 176 -2.20 % > witcher21040 -73.78 % > > All affected 991 -65.37 % ... 9681 -> 3353 > > Total 26725 -10.82 % ... 58490 -> 52162 > --- > src/gallium/drivers/radeonsi/si_shader.c| 154 > > src/gallium/drivers/radeonsi/si_shader.h| 11 ++ > src/gallium/drivers/radeonsi/si_state_shaders.c | 17 ++- > 3 files changed, 180 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index a361418..7fc1df4 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -6593,20 +6593,167 @@ static void si_init_shader_ctx(struct > si_shader_context *ctx, > bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; > bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = > si_llvm_emit_primitive; > bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; > > bld_base->op_actions[TGSI_OPCODE_MAX].emit = > build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32"; > bld_base->op_actions[TGSI_OPCODE_MIN].emit = > build_tgsi_intrinsic_nomem; > bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; > } > > +/* Return true if the PARAM export has been eliminated. */ > +static bool si_eliminate_const_output(struct si_shader_context *ctx, > + LLVMValueRef inst, unsigned offset) > +{ > + struct si_shader *shader = ctx->shader; > + unsigned num_outputs = shader->selector->info.num_outputs; > + double v[4]; > + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ > + > + for (i = 0; i < 4; i++) { > + LLVMBool loses_info; > + LLVMValueRef p = LLVMGetOperand(inst, 5 + i); > + if (!LLVMIsConstant(p)) > + return false; > + > + /* It's a constant expression. Undef outputs are eliminated > too. */ > + if (LLVMIsUndef(p)) > + v[i] = 0; > + else > + v[i] = LLVMConstRealGetDouble(p, &loses_info); > + > + if (v[i] != 0 && v[i] != 1) > + return false; > + } > + > + /* Only certain combinations of 0 and 1 can be eliminated. */ > + if (v[0] == 0 && v[1] == 0 && v[2] == 0) > + default_val = v[3] == 0 ? 0 : 1; > + else if (v[0] == 1 && v[1] == 1 && v[2] == 1) > + default_val = v[3] == 0 ? 2 : 3; > + else > + return false; > + > + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ > + LLVMInstructionEraseFromParent(inst); > + > + /* Change OFFSET to DEFAULT_VAL. */ > + for (i = 0; i < num_outputs; i++) { > + if (shader->info.vs_output_param_offset[i] == offset) { > + shader->info.vs_output_param_offset[i] = > + EXP_PARAM_DEFAULT_VAL_ + default_val; > + break; > + } > + } > + return true; > +} > + > +struct si_vs_exports { > + unsigned num; > + unsigned offset[SI_MAX_VS_OUTPUTS]; > + LLVMValueRef inst[SI_MAX_VS_OUTPUTS]; > +}; > + > +static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) > +{ > + struct si_shader *shader = ctx->shader; > + struct t
Re: [Mesa-dev] [PATCH] radeonsi: eliminate trivial constant VS outputs
On 18.10.2016 18:28, Marek Olšák wrote: From: Marek Olšák These constant value VS PARAM exports: - 0,0,0,0 - 0,0,0,1 - 1,1,1,0 - 1,1,1,1 can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports can be removed from the IR to save export & parameter memory. After LLVM optimizations, analyze the IR to see which exports are equal to the ones listed above (or undef) and remove them if they are. Targeted use cases: - All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them are unused by PS (such as Witcher 2 below). - VS output arrays with unused elements that the GLSL compiler can't eliminate (such as Batman below). The shader-db deltas are quite interesting: (not from upstream si-report.py, it won't be upstreamed) PERCENTAGE DELTASShaders PARAM exports (affected only) batman_arkham_origins589 -67.17 % bioshock-infinite 1769 -0.47 % dirt-showdown548 -2.68 % dota2 1747 -3.36 % f1-2015 776 -4.94 % left_4_dead_2 1762 -0.07 % metro_2033_redux2670 -0.43 % portal 474 -0.22 % talos_principle 324 -3.63 % warsow 176 -2.20 % witcher21040 -73.78 % All affected 991 -65.37 % ... 9681 -> 3353 Total 26725 -10.82 % ... 58490 -> 52162 That's a really cool result! --- src/gallium/drivers/radeonsi/si_shader.c| 154 src/gallium/drivers/radeonsi/si_shader.h| 11 ++ src/gallium/drivers/radeonsi/si_state_shaders.c | 17 ++- 3 files changed, 180 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a361418..7fc1df4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6593,20 +6593,167 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive; bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32"; bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; } +/* Return true if the PARAM export has been eliminated. */ +static bool si_eliminate_const_output(struct si_shader_context *ctx, + LLVMValueRef inst, unsigned offset) +{ + struct si_shader *shader = ctx->shader; + unsigned num_outputs = shader->selector->info.num_outputs; + double v[4]; + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ + + for (i = 0; i < 4; i++) { + LLVMBool loses_info; + LLVMValueRef p = LLVMGetOperand(inst, 5 + i); + if (!LLVMIsConstant(p)) + return false; + + /* It's a constant expression. Undef outputs are eliminated too. */ + if (LLVMIsUndef(p)) + v[i] = 0; + else + v[i] = LLVMConstRealGetDouble(p, &loses_info); + + if (v[i] != 0 && v[i] != 1) + return false; + } + + /* Only certain combinations of 0 and 1 can be eliminated. */ + if (v[0] == 0 && v[1] == 0 && v[2] == 0) + default_val = v[3] == 0 ? 0 : 1; + else if (v[0] == 1 && v[1] == 1 && v[2] == 1) + default_val = v[3] == 0 ? 2 : 3; You could slightly improve the handling of undef here by filling two arrays is_zero[4] and is_one[4] in the loop instead of v[4]. Undef would set both is_zero and is_one to true. It probably doesn't matter much in practice, so up to you. Reviewed-by: Nicolai Hähnle + else + return false; + + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ + LLVMInstructionEraseFromParent(inst); + + /* Change OFFSET to DEFAULT_VAL. */ + for (i = 0; i < num_outputs; i++) { + if (shader->info.vs_output_param_offset[i] == offset) { + shader->info.vs_output_param_offset[i] = + EXP_PARAM_DEFAULT_VAL_ + default_val; + break; + } + } + return true; +} + +struct si_vs_exports { + unsigned num; + unsigned offset[SI_MAX_VS_OUTPUTS]; + LLVMValueRef inst[SI_MAX_VS_OUTPUTS]; +}; + +static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) +{ + struct si_shader *shader = ctx->shader; + struct tgsi_shader_info *info = &shader->sele
[Mesa-dev] [PATCH] radeonsi: eliminate trivial constant VS outputs
From: Marek Olšák These constant value VS PARAM exports: - 0,0,0,0 - 0,0,0,1 - 1,1,1,0 - 1,1,1,1 can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports can be removed from the IR to save export & parameter memory. After LLVM optimizations, analyze the IR to see which exports are equal to the ones listed above (or undef) and remove them if they are. Targeted use cases: - All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them are unused by PS (such as Witcher 2 below). - VS output arrays with unused elements that the GLSL compiler can't eliminate (such as Batman below). The shader-db deltas are quite interesting: (not from upstream si-report.py, it won't be upstreamed) PERCENTAGE DELTASShaders PARAM exports (affected only) batman_arkham_origins589 -67.17 % bioshock-infinite 1769 -0.47 % dirt-showdown548 -2.68 % dota2 1747 -3.36 % f1-2015 776 -4.94 % left_4_dead_2 1762 -0.07 % metro_2033_redux2670 -0.43 % portal 474 -0.22 % talos_principle 324 -3.63 % warsow 176 -2.20 % witcher21040 -73.78 % All affected 991 -65.37 % ... 9681 -> 3353 Total 26725 -10.82 % ... 58490 -> 52162 --- src/gallium/drivers/radeonsi/si_shader.c| 154 src/gallium/drivers/radeonsi/si_shader.h| 11 ++ src/gallium/drivers/radeonsi/si_state_shaders.c | 17 ++- 3 files changed, 180 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a361418..7fc1df4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6593,20 +6593,167 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive; bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32"; bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; } +/* Return true if the PARAM export has been eliminated. */ +static bool si_eliminate_const_output(struct si_shader_context *ctx, + LLVMValueRef inst, unsigned offset) +{ + struct si_shader *shader = ctx->shader; + unsigned num_outputs = shader->selector->info.num_outputs; + double v[4]; + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ + + for (i = 0; i < 4; i++) { + LLVMBool loses_info; + LLVMValueRef p = LLVMGetOperand(inst, 5 + i); + if (!LLVMIsConstant(p)) + return false; + + /* It's a constant expression. Undef outputs are eliminated too. */ + if (LLVMIsUndef(p)) + v[i] = 0; + else + v[i] = LLVMConstRealGetDouble(p, &loses_info); + + if (v[i] != 0 && v[i] != 1) + return false; + } + + /* Only certain combinations of 0 and 1 can be eliminated. */ + if (v[0] == 0 && v[1] == 0 && v[2] == 0) + default_val = v[3] == 0 ? 0 : 1; + else if (v[0] == 1 && v[1] == 1 && v[2] == 1) + default_val = v[3] == 0 ? 2 : 3; + else + return false; + + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ + LLVMInstructionEraseFromParent(inst); + + /* Change OFFSET to DEFAULT_VAL. */ + for (i = 0; i < num_outputs; i++) { + if (shader->info.vs_output_param_offset[i] == offset) { + shader->info.vs_output_param_offset[i] = + EXP_PARAM_DEFAULT_VAL_ + default_val; + break; + } + } + return true; +} + +struct si_vs_exports { + unsigned num; + unsigned offset[SI_MAX_VS_OUTPUTS]; + LLVMValueRef inst[SI_MAX_VS_OUTPUTS]; +}; + +static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) +{ + struct si_shader *shader = ctx->shader; + struct tgsi_shader_info *info = &shader->selector->info; + LLVMBasicBlockRef bb; + struct si_vs_exports exports; + bool removed_any = false; + + exports.num = 0; + + if ((ctx->type == PIPE_SHADER_VERTEX && +(shader->key.vs.as_es || shader->key.vs.as_ls)) || + (ctx->type == PIPE_SHADER_TESS_EVAL && shader->key.tes.as_es)) + retu