On Mon, Feb 9, 2015 at 5:50 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > On Wednesday, February 04, 2015 08:21:19 PM Matt Turner wrote: >> --- >> src/mesa/drivers/dri/i965/brw_cfg.cpp | 68 >> +++++++++++++++++++++- >> src/mesa/drivers/dri/i965/brw_cfg.h | 7 ++- >> .../drivers/dri/i965/brw_dead_control_flow.cpp | 5 +- >> 3 files changed, 75 insertions(+), 5 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp >> b/src/mesa/drivers/dri/i965/brw_cfg.cpp >> index ca5b01c..068d4d6 100644 >> --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp >> @@ -51,7 +51,7 @@ link(void *mem_ctx, bblock_t *block) >> } >> >> bblock_t::bblock_t(cfg_t *cfg) : >> - cfg(cfg), start_ip(0), end_ip(0), num(0) >> + cfg(cfg), idom(NULL), start_ip(0), end_ip(0), num(0) >> { >> instructions.make_empty(); >> parents.make_empty(); >> @@ -157,6 +157,7 @@ cfg_t::cfg_t(exec_list *instructions) >> block_list.make_empty(); >> blocks = NULL; >> num_blocks = 0; >> + idom_dirty = true; >> >> bblock_t *cur = NULL; >> int ip = 0; >> @@ -409,10 +410,13 @@ cfg_t::make_block_array() >> } >> >> void >> -cfg_t::dump(backend_visitor *v) const >> +cfg_t::dump(backend_visitor *v) >> { >> + if (idom_dirty) >> + calculate_idom(); >> + >> foreach_block (block, this) { >> - fprintf(stderr, "START B%d", block->num); >> + fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num); >> foreach_list_typed(bblock_link, link, link, &block->parents) { >> fprintf(stderr, " <-B%d", >> link->block->num); >> @@ -428,3 +432,61 @@ cfg_t::dump(backend_visitor *v) const >> fprintf(stderr, "\n"); >> } >> } >> + >> +/* Calculates the immediate dominator of each block, according to "A Simple, >> + * Fast Dominance Algorithm" by Keith D. Cooper, Timothy J. Harvey, and Ken >> + * Kennedy. >> + * >> + * The authors claim that for control flow graphs of sizes normally >> encountered >> + * (less than 1000 nodes) that this algorithm is significantly faster than >> + * others like Lengauer-Tarjan. >> + */ >> +void >> +cfg_t::calculate_idom() >> +{ >> + foreach_block(block, this) { >> + block->idom = NULL; >> + } >> + blocks[0]->idom = blocks[0]; >> + >> + bool changed; >> + do { >> + changed = false; >> + >> + foreach_block(block, this) { >> + if (block->num == 0) >> + continue; >> + >> + bblock_t *new_idom = NULL; >> + foreach_list_typed(bblock_link, parent, link, &block->parents) { >> + if (parent->block->idom) { >> + if (new_idom == NULL) { >> + new_idom = parent->block; >> + } else if (parent->block->idom != NULL) { >> + new_idom = intersect(parent->block, new_idom); >> + } >> + } >> + } >> + >> + if (block->idom != new_idom) { >> + block->idom = new_idom; >> + changed = true; >> + } >> + } >> + } while (changed); >> + >> + idom_dirty = false; >> +} >> + >> +bblock_t * >> +cfg_t::intersect(bblock_t *b1, bblock_t *b2) >> +{ > > Be nice to your future self and leave a comment to the effect of: > > <mattst88> and in intersect the > vs the paper's < is due to the fact that we > number basic blocks backwards from what the paper expects, IIRC
Sure. I'll copy NIR's: /* Note, the comparisons here are the opposite of what the paper says * because we index blocks from beginning -> end (i.e. reverse * post-order) instead of post-order like they assume. */ >> + while (b1->num != b2->num) { >> + while (b1->num > b2->num) >> + b1 = b1->idom; >> + while (b2->num > b1->num) >> + b2 = b2->idom; >> + } >> + assert(b1); >> + return b1; >> +} >> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h >> b/src/mesa/drivers/dri/i965/brw_cfg.h >> index 0b60fec..215f248 100644 >> --- a/src/mesa/drivers/dri/i965/brw_cfg.h >> +++ b/src/mesa/drivers/dri/i965/brw_cfg.h >> @@ -81,6 +81,7 @@ struct bblock_t { >> >> struct exec_node link; >> struct cfg_t *cfg; >> + struct bblock_t *idom; >> >> int start_ip; >> int end_ip; >> @@ -269,8 +270,10 @@ struct cfg_t { >> bblock_t *new_block(); >> void set_next_block(bblock_t **cur, bblock_t *block, int ip); >> void make_block_array(); >> + void calculate_idom(); >> + static bblock_t *intersect(bblock_t *b1, bblock_t *b2); >> >> - void dump(backend_visitor *v) const; >> + void dump(backend_visitor *v); >> #endif >> void *mem_ctx; >> >> @@ -278,6 +281,8 @@ struct cfg_t { >> struct exec_list block_list; >> struct bblock_t **blocks; >> int num_blocks; >> + >> + bool idom_dirty; >> }; >> >> /* Note that this is implemented with a double for loop -- break will >> diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp >> b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp >> index 03f838d..4d68e12 100644 >> --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp >> @@ -114,8 +114,11 @@ dead_control_flow_eliminate(backend_visitor *v) >> } >> } >> >> - if (progress) >> + if (progress) { >> v->invalidate_live_intervals(); >> >> + v->cfg->idom_dirty = true; >> + } >> + >> return progress; >> } >> > > I think that you need to invalidate this info in more places - anywhere > that adds/removes a block would be a safe approximation. Could we > invalidate it from cfg_t::remove_block() and whatever functions adds a > block? Then we wouldn't have to teach optimization passes about this at > all, which would be really nice. > > With those two things fixed, > Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> Yeah, I'll remove the hunk from dead control flow and replace it with idom_dirty = true as the last statement in cfg_t::remove_block. Thanks! _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev