Re: [Mesa-dev] [PATCH] radeonsi: eliminate trivial constant VS outputs

2016-10-19 Thread Edmondo Tommasina
Hi Marek

Tested-by: Edmondo Tommasina 

I tested the patch with Witcher 2.

Thanks
edmondo


On Tue, Oct 18, 2016 at 6:28 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> These constant value VS PARAM exports:
> - 0,0,0,0
> - 0,0,0,1
> - 1,1,1,0
> - 1,1,1,1
> can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports
> can be removed from the IR to save export & parameter memory.
>
> After LLVM optimizations, analyze the IR to see which exports are equal to
> the ones listed above (or undef) and remove them if they are.
>
> Targeted use cases:
> - All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them
>   are unused by PS (such as Witcher 2 below).
> - VS output arrays with unused elements that the GLSL compiler can't
>   eliminate (such as Batman below).
>
> The shader-db deltas are quite interesting:
> (not from upstream si-report.py, it won't be upstreamed)
>
> PERCENTAGE DELTASShaders PARAM exports (affected only)
> batman_arkham_origins589  -67.17 %
> bioshock-infinite   1769   -0.47 %
> dirt-showdown548   -2.68 %
> dota2   1747   -3.36 %
> f1-2015  776   -4.94 %
> left_4_dead_2   1762   -0.07 %
> metro_2033_redux2670   -0.43 %
> portal   474   -0.22 %
> talos_principle  324   -3.63 %
> warsow   176   -2.20 %
> witcher21040  -73.78 %
> 
> All affected 991  -65.37 %  ... 9681 -> 3353
> 
> Total  26725  -10.82 %  ... 58490 -> 52162
> ---
>  src/gallium/drivers/radeonsi/si_shader.c| 154 
> 
>  src/gallium/drivers/radeonsi/si_shader.h|  11 ++
>  src/gallium/drivers/radeonsi/si_state_shaders.c |  17 ++-
>  3 files changed, 180 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index a361418..7fc1df4 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -6593,20 +6593,167 @@ static void si_init_shader_ctx(struct 
> si_shader_context *ctx,
> bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
> bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = 
> si_llvm_emit_primitive;
> bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
>
> bld_base->op_actions[TGSI_OPCODE_MAX].emit = 
> build_tgsi_intrinsic_nomem;
> bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
> bld_base->op_actions[TGSI_OPCODE_MIN].emit = 
> build_tgsi_intrinsic_nomem;
> bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
>  }
>
> +/* Return true if the PARAM export has been eliminated. */
> +static bool si_eliminate_const_output(struct si_shader_context *ctx,
> + LLVMValueRef inst, unsigned offset)
> +{
> +   struct si_shader *shader = ctx->shader;
> +   unsigned num_outputs = shader->selector->info.num_outputs;
> +   double v[4];
> +   unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
> +
> +   for (i = 0; i < 4; i++) {
> +   LLVMBool loses_info;
> +   LLVMValueRef p = LLVMGetOperand(inst, 5 + i);
> +   if (!LLVMIsConstant(p))
> +   return false;
> +
> +   /* It's a constant expression. Undef outputs are eliminated 
> too. */
> +   if (LLVMIsUndef(p))
> +   v[i] = 0;
> +   else
> +   v[i] = LLVMConstRealGetDouble(p, &loses_info);
> +
> +   if (v[i] != 0 && v[i] != 1)
> +   return false;
> +   }
> +
> +   /* Only certain combinations of 0 and 1 can be eliminated. */
> +   if (v[0] == 0 && v[1] == 0 && v[2] == 0)
> +   default_val = v[3] == 0 ? 0 : 1;
> +   else if (v[0] == 1 && v[1] == 1 && v[2] == 1)
> +   default_val = v[3] == 0 ? 2 : 3;
> +   else
> +   return false;
> +
> +   /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
> +   LLVMInstructionEraseFromParent(inst);
> +
> +   /* Change OFFSET to DEFAULT_VAL. */
> +   for (i = 0; i < num_outputs; i++) {
> +   if (shader->info.vs_output_param_offset[i] == offset) {
> +   shader->info.vs_output_param_offset[i] =
> +   EXP_PARAM_DEFAULT_VAL_ + default_val;
> +   break;
> +   }
> +   }
> +   return true;
> +}
> +
> +struct si_vs_exports {
> +   unsigned num;
> +   unsigned offset[SI_MAX_VS_OUTPUTS];
> +   LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
> +};
> +
> +static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
> +{
> +   struct si_shader *shader = ctx->shader;
> +   struct t

Re: [Mesa-dev] [PATCH] radeonsi: eliminate trivial constant VS outputs

2016-10-19 Thread Nicolai Hähnle

On 18.10.2016 18:28, Marek Olšák wrote:

From: Marek Olšák 

These constant value VS PARAM exports:
- 0,0,0,0
- 0,0,0,1
- 1,1,1,0
- 1,1,1,1
can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports
can be removed from the IR to save export & parameter memory.

After LLVM optimizations, analyze the IR to see which exports are equal to
the ones listed above (or undef) and remove them if they are.

Targeted use cases:
- All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them
  are unused by PS (such as Witcher 2 below).
- VS output arrays with unused elements that the GLSL compiler can't
  eliminate (such as Batman below).

The shader-db deltas are quite interesting:
(not from upstream si-report.py, it won't be upstreamed)

PERCENTAGE DELTASShaders PARAM exports (affected only)
batman_arkham_origins589  -67.17 %
bioshock-infinite   1769   -0.47 %
dirt-showdown548   -2.68 %
dota2   1747   -3.36 %
f1-2015  776   -4.94 %
left_4_dead_2   1762   -0.07 %
metro_2033_redux2670   -0.43 %
portal   474   -0.22 %
talos_principle  324   -3.63 %
warsow   176   -2.20 %
witcher21040  -73.78 %

All affected 991  -65.37 %  ... 9681 -> 3353

Total  26725  -10.82 %  ... 58490 -> 52162


That's a really cool result!



---
 src/gallium/drivers/radeonsi/si_shader.c| 154 
 src/gallium/drivers/radeonsi/si_shader.h|  11 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c |  17 ++-
 3 files changed, 180 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a361418..7fc1df4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6593,20 +6593,167 @@ static void si_init_shader_ctx(struct 
si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;

bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
 }

+/* Return true if the PARAM export has been eliminated. */
+static bool si_eliminate_const_output(struct si_shader_context *ctx,
+ LLVMValueRef inst, unsigned offset)
+{
+   struct si_shader *shader = ctx->shader;
+   unsigned num_outputs = shader->selector->info.num_outputs;
+   double v[4];
+   unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+
+   for (i = 0; i < 4; i++) {
+   LLVMBool loses_info;
+   LLVMValueRef p = LLVMGetOperand(inst, 5 + i);
+   if (!LLVMIsConstant(p))
+   return false;
+
+   /* It's a constant expression. Undef outputs are eliminated 
too. */
+   if (LLVMIsUndef(p))
+   v[i] = 0;
+   else
+   v[i] = LLVMConstRealGetDouble(p, &loses_info);
+
+   if (v[i] != 0 && v[i] != 1)
+   return false;
+   }
+
+   /* Only certain combinations of 0 and 1 can be eliminated. */
+   if (v[0] == 0 && v[1] == 0 && v[2] == 0)
+   default_val = v[3] == 0 ? 0 : 1;
+   else if (v[0] == 1 && v[1] == 1 && v[2] == 1)
+   default_val = v[3] == 0 ? 2 : 3;


You could slightly improve the handling of undef here by filling two 
arrays is_zero[4] and is_one[4] in the loop instead of v[4]. Undef would 
set both is_zero and is_one to true. It probably doesn't matter much in 
practice, so up to you.


Reviewed-by: Nicolai Hähnle 


+   else
+   return false;
+
+   /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+   LLVMInstructionEraseFromParent(inst);
+
+   /* Change OFFSET to DEFAULT_VAL. */
+   for (i = 0; i < num_outputs; i++) {
+   if (shader->info.vs_output_param_offset[i] == offset) {
+   shader->info.vs_output_param_offset[i] =
+   EXP_PARAM_DEFAULT_VAL_ + default_val;
+   break;
+   }
+   }
+   return true;
+}
+
+struct si_vs_exports {
+   unsigned num;
+   unsigned offset[SI_MAX_VS_OUTPUTS];
+   LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
+};
+
+static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
+{
+   struct si_shader *shader = ctx->shader;
+   struct tgsi_shader_info *info = &shader->sele

[Mesa-dev] [PATCH] radeonsi: eliminate trivial constant VS outputs

2016-10-18 Thread Marek Olšák
From: Marek Olšák 

These constant value VS PARAM exports:
- 0,0,0,0
- 0,0,0,1
- 1,1,1,0
- 1,1,1,1
can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports
can be removed from the IR to save export & parameter memory.

After LLVM optimizations, analyze the IR to see which exports are equal to
the ones listed above (or undef) and remove them if they are.

Targeted use cases:
- All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them
  are unused by PS (such as Witcher 2 below).
- VS output arrays with unused elements that the GLSL compiler can't
  eliminate (such as Batman below).

The shader-db deltas are quite interesting:
(not from upstream si-report.py, it won't be upstreamed)

PERCENTAGE DELTASShaders PARAM exports (affected only)
batman_arkham_origins589  -67.17 %
bioshock-infinite   1769   -0.47 %
dirt-showdown548   -2.68 %
dota2   1747   -3.36 %
f1-2015  776   -4.94 %
left_4_dead_2   1762   -0.07 %
metro_2033_redux2670   -0.43 %
portal   474   -0.22 %
talos_principle  324   -3.63 %
warsow   176   -2.20 %
witcher21040  -73.78 %

All affected 991  -65.37 %  ... 9681 -> 3353

Total  26725  -10.82 %  ... 58490 -> 52162
---
 src/gallium/drivers/radeonsi/si_shader.c| 154 
 src/gallium/drivers/radeonsi/si_shader.h|  11 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c |  17 ++-
 3 files changed, 180 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a361418..7fc1df4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6593,20 +6593,167 @@ static void si_init_shader_ctx(struct 
si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 
bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
 }
 
+/* Return true if the PARAM export has been eliminated. */
+static bool si_eliminate_const_output(struct si_shader_context *ctx,
+ LLVMValueRef inst, unsigned offset)
+{
+   struct si_shader *shader = ctx->shader;
+   unsigned num_outputs = shader->selector->info.num_outputs;
+   double v[4];
+   unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+
+   for (i = 0; i < 4; i++) {
+   LLVMBool loses_info;
+   LLVMValueRef p = LLVMGetOperand(inst, 5 + i);
+   if (!LLVMIsConstant(p))
+   return false;
+
+   /* It's a constant expression. Undef outputs are eliminated 
too. */
+   if (LLVMIsUndef(p))
+   v[i] = 0;
+   else
+   v[i] = LLVMConstRealGetDouble(p, &loses_info);
+
+   if (v[i] != 0 && v[i] != 1)
+   return false;
+   }
+
+   /* Only certain combinations of 0 and 1 can be eliminated. */
+   if (v[0] == 0 && v[1] == 0 && v[2] == 0)
+   default_val = v[3] == 0 ? 0 : 1;
+   else if (v[0] == 1 && v[1] == 1 && v[2] == 1)
+   default_val = v[3] == 0 ? 2 : 3;
+   else
+   return false;
+
+   /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+   LLVMInstructionEraseFromParent(inst);
+
+   /* Change OFFSET to DEFAULT_VAL. */
+   for (i = 0; i < num_outputs; i++) {
+   if (shader->info.vs_output_param_offset[i] == offset) {
+   shader->info.vs_output_param_offset[i] =
+   EXP_PARAM_DEFAULT_VAL_ + default_val;
+   break;
+   }
+   }
+   return true;
+}
+
+struct si_vs_exports {
+   unsigned num;
+   unsigned offset[SI_MAX_VS_OUTPUTS];
+   LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
+};
+
+static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
+{
+   struct si_shader *shader = ctx->shader;
+   struct tgsi_shader_info *info = &shader->selector->info;
+   LLVMBasicBlockRef bb;
+   struct si_vs_exports exports;
+   bool removed_any = false;
+
+   exports.num = 0;
+
+   if ((ctx->type == PIPE_SHADER_VERTEX &&
+(shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+   (ctx->type == PIPE_SHADER_TESS_EVAL && shader->key.tes.as_es))
+   retu