Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> On Fri, Oct 27, 2017 at 5:05 PM, Francisco Jerez <curroje...@riseup.net> wrote:
> This makes the dataflow propagation logic of the copy propagation pass > more intelligent in cases where the destination of a copy is known to > be undefined for some incoming CFG edges, building upon the > definedness information provided by the last patch. Helps a few > programs, and avoids a handful shader-db regressions from the next > patch. > > shader-db results on ILK: > > total instructions in shared programs: 6541547 -> 6541523 (-0.00%) > instructions in affected programs: 360 -> 336 (-6.67%) > helped: 8 > HURT: 0 > > LOST: 0 > GAINED: 10 > > shader-db results on BDW: > > total instructions in shared programs: 8174323 -> 8173882 (-0.01%) > instructions in affected programs: 7730 -> 7289 (-5.71%) > helped: 5 > HURT: 2 > > LOST: 0 > GAINED: 4 > > shader-db results on SKL: > > total instructions in shared programs: 8185669 -> 8184598 (-0.01%) > instructions in affected programs: 10364 -> 9293 (-10.33%) > helped: 5 > HURT: 2 > > LOST: 0 > GAINED: 2 > --- > src/intel/compiler/brw_fs_copy_propagation.cpp | 50 > ++++++++++++++++++++++++-- > 1 file changed, 47 insertions(+), 3 deletions(-) > > diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp > b/src/intel/compiler/brw_fs_copy_propagation.cpp > index cb117396089..5897cff35be 100644 > --- a/src/intel/compiler/brw_fs_copy_propagation.cpp > +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp > @@ -36,9 +36,12 @@ > > #include "util/bitset.h" > #include "brw_fs.h" > +#include "brw_fs_live_variables.h" > #include "brw_cfg.h" > #include "brw_eu.h" > > +using namespace brw; > + > namespace { /* avoid conflict with opt_copy_propagation_elements */ > struct acp_entry : public exec_node { > fs_reg dst; > @@ -77,12 +80,19 @@ struct block_data { > * course of this block. > */ > BITSET_WORD *kill; > + > + /** > + * Which entries in the fs_copy_prop_dataflow acp table are guaranteed > to > + * have a fully uninitialized destination at the end of this block. > + */ > + BITSET_WORD *undef; > }; > > class fs_copy_prop_dataflow > { > public: > fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, > + const fs_live_variables *live, > exec_list *out_acp[ACP_HASH_SIZE]); > > void setup_initial_values(); > @@ -92,6 +102,7 @@ public: > > void *mem_ctx; > cfg_t *cfg; > + const fs_live_variables *live; > > acp_entry **acp; > int num_acp; > @@ -102,8 +113,9 @@ public: > } /* anonymous namespace */ > > fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, > + const fs_live_variables > *live, > exec_list > *out_acp[ACP_HASH_SIZE]) > - : mem_ctx(mem_ctx), cfg(cfg) > + : mem_ctx(mem_ctx), cfg(cfg), live(live) > { > bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); > > @@ -124,6 +136,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void > *mem_ctx, cfg_t *cfg, > bd[block->num].liveout = rzalloc_array(bd, BITSET_WORD, > bitset_words); > bd[block->num].copy = rzalloc_array(bd, BITSET_WORD, bitset_words); > bd[block->num].kill = rzalloc_array(bd, BITSET_WORD, bitset_words); > + bd[block->num].undef = rzalloc_array(bd, BITSET_WORD, bitset_words); > > for (int i = 0; i < ACP_HASH_SIZE; i++) { > foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) { > @@ -189,6 +202,18 @@ fs_copy_prop_dataflow::setup_initial_values() > } > } > } > + > + /* Initialize the undef set. */ > + foreach_block (block, cfg) { > + for (int i = 0; i < num_acp; i++) { > + BITSET_SET(bd[block->num].undef, i); > + for (unsigned off = 0; off < acp[i]->size_written; off += > REG_SIZE) { > + if (BITSET_TEST(live->block_data[block->num].defout, > + live->var_from_reg(byte_offset(acp[i]->dst, > off)))) > + BITSET_CLEAR(bd[block->num].undef, i); > + } > + } > + } > } > > /** > @@ -229,13 +254,30 @@ fs_copy_prop_dataflow::run() > > for (int i = 0; i < bitset_words; i++) { > const BITSET_WORD old_livein = bd[block->num].livein[i]; > + BITSET_WORD livein_from_any_block = 0; > > bd[block->num].livein[i] = ~0u; > foreach_list_typed(bblock_link, parent_link, link, > &block->parents) { > bblock_t *parent = parent_link->block; > - bd[block->num].livein[i] &= bd[parent->num].liveout[i]; > + /* Consider ACP entries with a known-undefined destination > to > + * be available from the parent. This is valid because > we're > + * free to set the undefined variable equal to the source > of > + * the ACP entry without breaking the application's > + * expectations, since the variable is undefined. > + */ > + bd[block->num].livein[i] &= (bd[parent->num].liveout[i] | > + bd[parent->num].undef[i]); > + livein_from_any_block |= bd[parent->num].liveout[i]; > } > > + /* Limit to the set of ACP entries that can possibly be > available > + * at the start of the block, since propagating from a > variable > + * which is guaranteed to be undefined (rather than > potentially > + * undefined for some dynamic control-flow paths) doesn't seem > + * particularly useful. > + */ > + bd[block->num].livein[i] &= livein_from_any_block; > + > if (old_livein != bd[block->num].livein[i]) > progress = true; > } > @@ -830,6 +872,8 @@ fs_visitor::opt_copy_propagation() > for (int i = 0; i < cfg->num_blocks; i++) > out_acp[i] = new exec_list [ACP_HASH_SIZE]; > > + calculate_live_intervals(); > + > /* First, walk through each block doing local copy propagation and > getting > * the set of copies available at the end of the block. > */ > @@ -839,7 +883,7 @@ fs_visitor::opt_copy_propagation() > } > > /* Do dataflow analysis for those available copies. */ > - fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp); > + fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, live_intervals, > out_acp); > > /* Next, re-run local copy propagation, this time with the set of > copies > * provided by the dataflow analysis available at the start of a block. > -- > 2.14.2 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev