Re: [Mesa-dev] [PATCH 19/22] i965/fs: Merge CMP and SEL into CSEL on Gen8+

2018-02-28 Thread Samuel Iglesias Gonsálvez

Reviewed-by: Samuel Iglesias Gonsálvez 


On 24/02/18 00:56, Ian Romanick wrote:
> From: Ian Romanick 
>
> Skylake
> total instructions in shared programs: 14514547 -> 14503025 (-0.08%)
> instructions in affected programs: 2008312 -> 1996790 (-0.57%)
> helped: 5816
> HURT: 0
> helped stats (abs) min: 1 max: 27 x̄: 1.98 x̃: 1
> helped stats (rel) min: 0.03% max: 6.34% x̄: 0.90% x̃: 0.69%
> 95% mean confidence interval for instructions value: -2.04 -1.92
> 95% mean confidence interval for instructions %-change: -0.92% -0.88%
> Instructions are helped.
>
> total cycles in shared programs: 533136780 -> 532150352 (-0.19%)
> cycles in affected programs: 347942196 -> 346955768 (-0.28%)
> helped: 4796
> HURT: 767
> helped stats (abs) min: 1 max: 32000 x̄: 223.04 x̃: 10
> helped stats (rel) min: <.01% max: 24.02% x̄: 2.72% x̃: 0.74%
> HURT stats (abs)   min: 1 max: 5252 x̄: 108.59 x̃: 21
> HURT stats (rel)   min: 0.01% max: 89.43% x̄: 2.50% x̃: 0.55%
> 95% mean confidence interval for cycles value: -233.64 -120.99
> 95% mean confidence interval for cycles %-change: -2.13% -1.87%
> Cycles are helped.
>
> LOST:   0
> GAINED: 1
>
> Broadwell
> total instructions in shared programs: 14808028 -> 14796582 (-0.08%)
> instructions in affected programs: 2341840 -> 2330394 (-0.49%)
> helped: 5802
> HURT: 0
> helped stats (abs) min: 1 max: 27 x̄: 1.97 x̃: 1
> helped stats (rel) min: 0.03% max: 6.25% x̄: 0.90% x̃: 0.69%
> 95% mean confidence interval for instructions value: -2.03 -1.92
> 95% mean confidence interval for instructions %-change: -0.92% -0.88%
> Instructions are helped.
>
> total cycles in shared programs: 559431004 -> 558393842 (-0.19%)
> cycles in affected programs: 368801897 -> 367764735 (-0.28%)
> helped: 4695
> HURT: 828
> helped stats (abs) min: 1 max: 32000 x̄: 230.83 x̃: 10
> helped stats (rel) min: <.01% max: 23.60% x̄: 2.61% x̃: 0.62%
> HURT stats (abs)   min: 1 max: 2538 x̄: 56.25 x̃: 14
> HURT stats (rel)   min: <.01% max: 96.72% x̄: 2.88% x̃: 0.85%
> 95% mean confidence interval for cycles value: -244.48 -131.10
> 95% mean confidence interval for cycles %-change: -1.92% -1.65%
> Cycles are helped.
>
> No changes on earlier platforms.
>
> Signed-off-by: Ian Romanick 
> ---
>  src/intel/compiler/brw_fs.cpp | 85 
> +++
>  src/intel/compiler/brw_fs.h   |  1 +
>  2 files changed, 86 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index accae1b..1192436 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -2813,6 +2813,85 @@ mask_relative_to(const fs_reg , const fs_reg , 
> unsigned ds)
>  }
>  
>  bool
> +fs_visitor::opt_peephole_csel()
> +{
> +   if (devinfo->gen < 8)
> +  return false;
> +
> +   bool progress = false;
> +
> +   foreach_block_reverse(block, cfg) {
> +  int ip = block->end_ip + 1;
> +
> +  foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
> + ip--;
> +
> + if (inst->opcode != BRW_OPCODE_SEL ||
> + inst->predicate == BRW_PREDICATE_NONE ||
> + (inst->dst.type != BRW_REGISTER_TYPE_F &&
> +  inst->dst.type != BRW_REGISTER_TYPE_D &&
> +  inst->dst.type != BRW_REGISTER_TYPE_UD))
> +continue;
> +
> + /* Because it is a 3-src instruction, CSEL cannot have any immediate
> +  * values as sources.
> +  */
> + if (inst->src[0].file != VGRF ||
> + (inst->src[1].file != VGRF && !inst->src[1].is_zero()))
> +continue;
> +
> + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, 
> inst) {
> +if (!scan_inst->flags_written())
> +   continue;
> +
> +if ((scan_inst->opcode != BRW_OPCODE_CMP &&
> + scan_inst->opcode != BRW_OPCODE_MOV) ||
> +scan_inst->predicate != BRW_PREDICATE_NONE ||
> +scan_inst->src[0].file != VGRF ||
> +scan_inst->src[0].type != BRW_REGISTER_TYPE_F)
> +   break;
> +
> +if (scan_inst->opcode == BRW_OPCODE_CMP && 
> !scan_inst->src[1].is_zero())
> +   break;
> +
> +const brw::fs_builder ibld(this, block, inst);
> +
> +const enum brw_conditional_mod cond =
> +   inst->predicate == BRW_PREDICATE_NORMAL
> +   ? scan_inst->conditional_mod
> +   : brw_swap_cmod(scan_inst->conditional_mod);
> +
> +fs_inst *csel_inst = NULL;
> +
> +if (inst->src[1].file == VGRF) {
> +   csel_inst = ibld.CSEL(inst->dst,
> + inst->src[0],
> + inst->src[1],
> + scan_inst->src[0],
> + cond);
> +} else if (cond == BRW_CONDITIONAL_NZ) {
> +   csel_inst = 

[Mesa-dev] [PATCH 19/22] i965/fs: Merge CMP and SEL into CSEL on Gen8+

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Skylake
total instructions in shared programs: 14514547 -> 14503025 (-0.08%)
instructions in affected programs: 2008312 -> 1996790 (-0.57%)
helped: 5816
HURT: 0
helped stats (abs) min: 1 max: 27 x̄: 1.98 x̃: 1
helped stats (rel) min: 0.03% max: 6.34% x̄: 0.90% x̃: 0.69%
95% mean confidence interval for instructions value: -2.04 -1.92
95% mean confidence interval for instructions %-change: -0.92% -0.88%
Instructions are helped.

total cycles in shared programs: 533136780 -> 532150352 (-0.19%)
cycles in affected programs: 347942196 -> 346955768 (-0.28%)
helped: 4796
HURT: 767
helped stats (abs) min: 1 max: 32000 x̄: 223.04 x̃: 10
helped stats (rel) min: <.01% max: 24.02% x̄: 2.72% x̃: 0.74%
HURT stats (abs)   min: 1 max: 5252 x̄: 108.59 x̃: 21
HURT stats (rel)   min: 0.01% max: 89.43% x̄: 2.50% x̃: 0.55%
95% mean confidence interval for cycles value: -233.64 -120.99
95% mean confidence interval for cycles %-change: -2.13% -1.87%
Cycles are helped.

LOST:   0
GAINED: 1

Broadwell
total instructions in shared programs: 14808028 -> 14796582 (-0.08%)
instructions in affected programs: 2341840 -> 2330394 (-0.49%)
helped: 5802
HURT: 0
helped stats (abs) min: 1 max: 27 x̄: 1.97 x̃: 1
helped stats (rel) min: 0.03% max: 6.25% x̄: 0.90% x̃: 0.69%
95% mean confidence interval for instructions value: -2.03 -1.92
95% mean confidence interval for instructions %-change: -0.92% -0.88%
Instructions are helped.

total cycles in shared programs: 559431004 -> 558393842 (-0.19%)
cycles in affected programs: 368801897 -> 367764735 (-0.28%)
helped: 4695
HURT: 828
helped stats (abs) min: 1 max: 32000 x̄: 230.83 x̃: 10
helped stats (rel) min: <.01% max: 23.60% x̄: 2.61% x̃: 0.62%
HURT stats (abs)   min: 1 max: 2538 x̄: 56.25 x̃: 14
HURT stats (rel)   min: <.01% max: 96.72% x̄: 2.88% x̃: 0.85%
95% mean confidence interval for cycles value: -244.48 -131.10
95% mean confidence interval for cycles %-change: -1.92% -1.65%
Cycles are helped.

No changes on earlier platforms.

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_fs.cpp | 85 +++
 src/intel/compiler/brw_fs.h   |  1 +
 2 files changed, 86 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index accae1b..1192436 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2813,6 +2813,85 @@ mask_relative_to(const fs_reg , const fs_reg , 
unsigned ds)
 }
 
 bool
+fs_visitor::opt_peephole_csel()
+{
+   if (devinfo->gen < 8)
+  return false;
+
+   bool progress = false;
+
+   foreach_block_reverse(block, cfg) {
+  int ip = block->end_ip + 1;
+
+  foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
+ ip--;
+
+ if (inst->opcode != BRW_OPCODE_SEL ||
+ inst->predicate == BRW_PREDICATE_NONE ||
+ (inst->dst.type != BRW_REGISTER_TYPE_F &&
+  inst->dst.type != BRW_REGISTER_TYPE_D &&
+  inst->dst.type != BRW_REGISTER_TYPE_UD))
+continue;
+
+ /* Because it is a 3-src instruction, CSEL cannot have any immediate
+  * values as sources.
+  */
+ if (inst->src[0].file != VGRF ||
+ (inst->src[1].file != VGRF && !inst->src[1].is_zero()))
+continue;
+
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) 
{
+if (!scan_inst->flags_written())
+   continue;
+
+if ((scan_inst->opcode != BRW_OPCODE_CMP &&
+ scan_inst->opcode != BRW_OPCODE_MOV) ||
+scan_inst->predicate != BRW_PREDICATE_NONE ||
+scan_inst->src[0].file != VGRF ||
+scan_inst->src[0].type != BRW_REGISTER_TYPE_F)
+   break;
+
+if (scan_inst->opcode == BRW_OPCODE_CMP && 
!scan_inst->src[1].is_zero())
+   break;
+
+const brw::fs_builder ibld(this, block, inst);
+
+const enum brw_conditional_mod cond =
+   inst->predicate == BRW_PREDICATE_NORMAL
+   ? scan_inst->conditional_mod
+   : brw_swap_cmod(scan_inst->conditional_mod);
+
+fs_inst *csel_inst = NULL;
+
+if (inst->src[1].file == VGRF) {
+   csel_inst = ibld.CSEL(inst->dst,
+ inst->src[0],
+ inst->src[1],
+ scan_inst->src[0],
+ cond);
+} else if (cond == BRW_CONDITIONAL_NZ) {
+   csel_inst = ibld.CSEL(inst->dst,
+ inst->src[0],
+ scan_inst->src[0],
+ scan_inst->src[0],
+ cond);
+
+   /* This ensures that we get 0.0 and not -0.0. */
+   csel_inst->src[1].abs = true;
+}
+
+