Re: [Mesa-dev] [PATCH 19/22] i965/fs: Merge CMP and SEL into CSEL on Gen8+
Reviewed-by: Samuel Iglesias GonsálvezOn 24/02/18 00:56, Ian Romanick wrote: > From: Ian Romanick > > Skylake > total instructions in shared programs: 14514547 -> 14503025 (-0.08%) > instructions in affected programs: 2008312 -> 1996790 (-0.57%) > helped: 5816 > HURT: 0 > helped stats (abs) min: 1 max: 27 x̄: 1.98 x̃: 1 > helped stats (rel) min: 0.03% max: 6.34% x̄: 0.90% x̃: 0.69% > 95% mean confidence interval for instructions value: -2.04 -1.92 > 95% mean confidence interval for instructions %-change: -0.92% -0.88% > Instructions are helped. > > total cycles in shared programs: 533136780 -> 532150352 (-0.19%) > cycles in affected programs: 347942196 -> 346955768 (-0.28%) > helped: 4796 > HURT: 767 > helped stats (abs) min: 1 max: 32000 x̄: 223.04 x̃: 10 > helped stats (rel) min: <.01% max: 24.02% x̄: 2.72% x̃: 0.74% > HURT stats (abs) min: 1 max: 5252 x̄: 108.59 x̃: 21 > HURT stats (rel) min: 0.01% max: 89.43% x̄: 2.50% x̃: 0.55% > 95% mean confidence interval for cycles value: -233.64 -120.99 > 95% mean confidence interval for cycles %-change: -2.13% -1.87% > Cycles are helped. > > LOST: 0 > GAINED: 1 > > Broadwell > total instructions in shared programs: 14808028 -> 14796582 (-0.08%) > instructions in affected programs: 2341840 -> 2330394 (-0.49%) > helped: 5802 > HURT: 0 > helped stats (abs) min: 1 max: 27 x̄: 1.97 x̃: 1 > helped stats (rel) min: 0.03% max: 6.25% x̄: 0.90% x̃: 0.69% > 95% mean confidence interval for instructions value: -2.03 -1.92 > 95% mean confidence interval for instructions %-change: -0.92% -0.88% > Instructions are helped. > > total cycles in shared programs: 559431004 -> 558393842 (-0.19%) > cycles in affected programs: 368801897 -> 367764735 (-0.28%) > helped: 4695 > HURT: 828 > helped stats (abs) min: 1 max: 32000 x̄: 230.83 x̃: 10 > helped stats (rel) min: <.01% max: 23.60% x̄: 2.61% x̃: 0.62% > HURT stats (abs) min: 1 max: 2538 x̄: 56.25 x̃: 14 > HURT stats (rel) min: <.01% max: 96.72% x̄: 2.88% x̃: 0.85% > 95% mean confidence interval for cycles value: -244.48 -131.10 > 95% mean confidence interval for cycles %-change: -1.92% -1.65% > Cycles are helped. > > No changes on earlier platforms. > > Signed-off-by: Ian Romanick > --- > src/intel/compiler/brw_fs.cpp | 85 > +++ > src/intel/compiler/brw_fs.h | 1 + > 2 files changed, 86 insertions(+) > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index accae1b..1192436 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -2813,6 +2813,85 @@ mask_relative_to(const fs_reg , const fs_reg , > unsigned ds) > } > > bool > +fs_visitor::opt_peephole_csel() > +{ > + if (devinfo->gen < 8) > + return false; > + > + bool progress = false; > + > + foreach_block_reverse(block, cfg) { > + int ip = block->end_ip + 1; > + > + foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { > + ip--; > + > + if (inst->opcode != BRW_OPCODE_SEL || > + inst->predicate == BRW_PREDICATE_NONE || > + (inst->dst.type != BRW_REGISTER_TYPE_F && > + inst->dst.type != BRW_REGISTER_TYPE_D && > + inst->dst.type != BRW_REGISTER_TYPE_UD)) > +continue; > + > + /* Because it is a 3-src instruction, CSEL cannot have any immediate > + * values as sources. > + */ > + if (inst->src[0].file != VGRF || > + (inst->src[1].file != VGRF && !inst->src[1].is_zero())) > +continue; > + > + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, > inst) { > +if (!scan_inst->flags_written()) > + continue; > + > +if ((scan_inst->opcode != BRW_OPCODE_CMP && > + scan_inst->opcode != BRW_OPCODE_MOV) || > +scan_inst->predicate != BRW_PREDICATE_NONE || > +scan_inst->src[0].file != VGRF || > +scan_inst->src[0].type != BRW_REGISTER_TYPE_F) > + break; > + > +if (scan_inst->opcode == BRW_OPCODE_CMP && > !scan_inst->src[1].is_zero()) > + break; > + > +const brw::fs_builder ibld(this, block, inst); > + > +const enum brw_conditional_mod cond = > + inst->predicate == BRW_PREDICATE_NORMAL > + ? scan_inst->conditional_mod > + : brw_swap_cmod(scan_inst->conditional_mod); > + > +fs_inst *csel_inst = NULL; > + > +if (inst->src[1].file == VGRF) { > + csel_inst = ibld.CSEL(inst->dst, > + inst->src[0], > + inst->src[1], > + scan_inst->src[0], > + cond); > +} else if (cond == BRW_CONDITIONAL_NZ) { > + csel_inst =
[Mesa-dev] [PATCH 19/22] i965/fs: Merge CMP and SEL into CSEL on Gen8+
From: Ian RomanickSkylake total instructions in shared programs: 14514547 -> 14503025 (-0.08%) instructions in affected programs: 2008312 -> 1996790 (-0.57%) helped: 5816 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 1.98 x̃: 1 helped stats (rel) min: 0.03% max: 6.34% x̄: 0.90% x̃: 0.69% 95% mean confidence interval for instructions value: -2.04 -1.92 95% mean confidence interval for instructions %-change: -0.92% -0.88% Instructions are helped. total cycles in shared programs: 533136780 -> 532150352 (-0.19%) cycles in affected programs: 347942196 -> 346955768 (-0.28%) helped: 4796 HURT: 767 helped stats (abs) min: 1 max: 32000 x̄: 223.04 x̃: 10 helped stats (rel) min: <.01% max: 24.02% x̄: 2.72% x̃: 0.74% HURT stats (abs) min: 1 max: 5252 x̄: 108.59 x̃: 21 HURT stats (rel) min: 0.01% max: 89.43% x̄: 2.50% x̃: 0.55% 95% mean confidence interval for cycles value: -233.64 -120.99 95% mean confidence interval for cycles %-change: -2.13% -1.87% Cycles are helped. LOST: 0 GAINED: 1 Broadwell total instructions in shared programs: 14808028 -> 14796582 (-0.08%) instructions in affected programs: 2341840 -> 2330394 (-0.49%) helped: 5802 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 1.97 x̃: 1 helped stats (rel) min: 0.03% max: 6.25% x̄: 0.90% x̃: 0.69% 95% mean confidence interval for instructions value: -2.03 -1.92 95% mean confidence interval for instructions %-change: -0.92% -0.88% Instructions are helped. total cycles in shared programs: 559431004 -> 558393842 (-0.19%) cycles in affected programs: 368801897 -> 367764735 (-0.28%) helped: 4695 HURT: 828 helped stats (abs) min: 1 max: 32000 x̄: 230.83 x̃: 10 helped stats (rel) min: <.01% max: 23.60% x̄: 2.61% x̃: 0.62% HURT stats (abs) min: 1 max: 2538 x̄: 56.25 x̃: 14 HURT stats (rel) min: <.01% max: 96.72% x̄: 2.88% x̃: 0.85% 95% mean confidence interval for cycles value: -244.48 -131.10 95% mean confidence interval for cycles %-change: -1.92% -1.65% Cycles are helped. No changes on earlier platforms. Signed-off-by: Ian Romanick --- src/intel/compiler/brw_fs.cpp | 85 +++ src/intel/compiler/brw_fs.h | 1 + 2 files changed, 86 insertions(+) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index accae1b..1192436 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2813,6 +2813,85 @@ mask_relative_to(const fs_reg , const fs_reg , unsigned ds) } bool +fs_visitor::opt_peephole_csel() +{ + if (devinfo->gen < 8) + return false; + + bool progress = false; + + foreach_block_reverse(block, cfg) { + int ip = block->end_ip + 1; + + foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { + ip--; + + if (inst->opcode != BRW_OPCODE_SEL || + inst->predicate == BRW_PREDICATE_NONE || + (inst->dst.type != BRW_REGISTER_TYPE_F && + inst->dst.type != BRW_REGISTER_TYPE_D && + inst->dst.type != BRW_REGISTER_TYPE_UD)) +continue; + + /* Because it is a 3-src instruction, CSEL cannot have any immediate + * values as sources. + */ + if (inst->src[0].file != VGRF || + (inst->src[1].file != VGRF && !inst->src[1].is_zero())) +continue; + + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { +if (!scan_inst->flags_written()) + continue; + +if ((scan_inst->opcode != BRW_OPCODE_CMP && + scan_inst->opcode != BRW_OPCODE_MOV) || +scan_inst->predicate != BRW_PREDICATE_NONE || +scan_inst->src[0].file != VGRF || +scan_inst->src[0].type != BRW_REGISTER_TYPE_F) + break; + +if (scan_inst->opcode == BRW_OPCODE_CMP && !scan_inst->src[1].is_zero()) + break; + +const brw::fs_builder ibld(this, block, inst); + +const enum brw_conditional_mod cond = + inst->predicate == BRW_PREDICATE_NORMAL + ? scan_inst->conditional_mod + : brw_swap_cmod(scan_inst->conditional_mod); + +fs_inst *csel_inst = NULL; + +if (inst->src[1].file == VGRF) { + csel_inst = ibld.CSEL(inst->dst, + inst->src[0], + inst->src[1], + scan_inst->src[0], + cond); +} else if (cond == BRW_CONDITIONAL_NZ) { + csel_inst = ibld.CSEL(inst->dst, + inst->src[0], + scan_inst->src[0], + scan_inst->src[0], + cond); + + /* This ensures that we get 0.0 and not -0.0. */ + csel_inst->src[1].abs = true; +} + +