On Thu, Oct 6, 2016 at 10:22 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > > > On Wed, Oct 5, 2016 at 7:25 PM, Timothy Arceri < > timothy.arc...@collabora.com> wrote: > >> Just >> >> On Wed, 2016-10-05 at 16:23 -0700, Jason Ekstrand wrote: >> > >> > >> > On Thu, Sep 15, 2016 at 12:03 AM, Timothy Arceri <timothy.arceri@coll >> > abora.com> wrote: >> > > V2: >> > > - tidy ups suggested by Connor. >> > > - tidy up cloning logic and handle copy propagation >> > > based of suggestion by Connor. >> > > - use nir_ssa_def_rewrite_uses to fix up lcssa phis >> > > suggested by Connor. >> > > - add support for complex loop unrolling (two terminators) >> > > - handle case were the ssa defs use outside the loop is already a >> > > phi >> > > - support unrolling loops with multiple terminators when trip count >> > > is know for each terminator >> > > --- >> > > src/compiler/Makefile.sources | 1 + >> > > src/compiler/nir/nir.h | 2 + >> > > src/compiler/nir/nir_opt_loop_unroll.c | 820 >> > > +++++++++++++++++++++++++++++++++ >> > > 3 files changed, 823 insertions(+) >> > > create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c >> > > >> > > diff --git a/src/compiler/Makefile.sources >> > > b/src/compiler/Makefile.sources >> > > index 8ef6080..b3512bb 100644 >> > > --- a/src/compiler/Makefile.sources >> > > +++ b/src/compiler/Makefile.sources >> > > @@ -233,6 +233,7 @@ NIR_FILES = \ >> > > nir/nir_opt_dead_cf.c \ >> > > nir/nir_opt_gcm.c \ >> > > nir/nir_opt_global_to_local.c \ >> > > + nir/nir_opt_loop_unroll.c \ >> > > nir/nir_opt_peephole_select.c \ >> > > nir/nir_opt_remove_phis.c \ >> > > nir/nir_opt_undef.c \ >> > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> > > index 9887432..0513d81 100644 >> > > --- a/src/compiler/nir/nir.h >> > > +++ b/src/compiler/nir/nir.h >> > > @@ -2661,6 +2661,8 @@ bool nir_opt_dead_cf(nir_shader *shader); >> > > >> > > bool nir_opt_gcm(nir_shader *shader, bool value_number); >> > > >> > > +bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode >> > > indirect_mask); >> > > + >> > > bool nir_opt_peephole_select(nir_shader *shader); >> > > >> > > bool nir_opt_remove_phis(nir_shader *shader); >> > > diff --git a/src/compiler/nir/nir_opt_loop_unroll.c >> > > b/src/compiler/nir/nir_opt_loop_unroll.c >> > > new file mode 100644 >> > > index 0000000..1de02f6 >> > > --- /dev/null >> > > +++ b/src/compiler/nir/nir_opt_loop_unroll.c >> > > @@ -0,0 +1,820 @@ >> > > +/* >> > > + * Copyright © 2016 Intel Corporation >> > > + * >> > > + * Permission is hereby granted, free of charge, to any person >> > > obtaining a >> > > + * copy of this software and associated documentation files (the >> > > "Software"), >> > > + * to deal in the Software without restriction, including without >> > > limitation >> > > + * the rights to use, copy, modify, merge, publish, distribute, >> > > sublicense, >> > > + * and/or sell copies of the Software, and to permit persons to >> > > whom the >> > > + * Software is furnished to do so, subject to the following >> > > conditions: >> > > + * >> > > + * The above copyright notice and this permission notice >> > > (including the next >> > > + * paragraph) shall be included in all copies or substantial >> > > portions of the >> > > + * Software. >> > > + * >> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> > > EXPRESS OR >> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> > > MERCHANTABILITY, >> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO >> > > EVENT SHALL >> > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, >> > > DAMAGES OR OTHER >> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> > > ARISING >> > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> > > OTHER >> > > + * DEALINGS IN THE SOFTWARE. >> > > + */ >> > > + >> > > +#include "nir.h" >> > > +#include "nir_builder.h" >> > > +#include "nir_control_flow.h" >> > > + >> > > +static void >> > > +extract_loop_body(nir_cf_list *extracted, nir_cf_node *node) >> > >> > "node" is not particularly descriptive. Perhaps "start_node" or >> > something like that. >> > >> > > +{ >> > > + nir_cf_node *end = node; >> > > + while (!nir_cf_node_is_last(end)) >> > > + end = nir_cf_node_next(end); >> > >> > This bit of iteration seems unfortunate. If you have the loop >> > pointer, you can just do >> > >> > nir_cf_extract(extracted, nir_before_cf_node(node), >> > nir_after_cf_node(nir_loop_last_cf_node(loop)) >> > >> > For that matter, is the helper even needed? If you don't want to >> > type that much and want to keep the helper, you could easily get the >> > loop from node->parent. >> >> Sure this was one of the first bits I wrote before finding the nir >> helpers. Will see if I can just remove it. >> >> > >> > > + >> > > + nir_cf_extract(extracted, nir_before_cf_node(node), >> > > + nir_after_cf_node(end)); >> > > +} >> > > + >> > > +static void >> > > +clone_list(nir_shader *ns, nir_loop *loop, nir_cf_list >> > > *src_cf_list, >> > > + nir_cf_list *cloned_cf_list, struct hash_table >> > > *remap_table) >> > > +{ >> > > + /* Dest list needs to at least have one block */ >> > > + nir_block *nblk = nir_block_create(ns); >> > > + nblk->cf_node.parent = loop->cf_node.parent; >> > > + exec_list_push_tail(&cloned_cf_list->list, &nblk- >> > > >cf_node.node); >> > >> > Do you always want to do this or should it be guarded by "if >> > (exec_list_empty(&src_cf_list->list))"? >> >> I think I was relying on the clone to do that check but makes sense to >> do it here. Will add. >> >> > >> > > + >> > > + nir_clone_loop_list(&cloned_cf_list->list, &src_cf_list->list, >> > > + remap_table, ns); >> > > +} >> > > + >> > > +static void >> > > +move_cf_list_into_if(nir_cf_list *lst, nir_cf_node *if_node, >> > > + nir_cf_node *last_node, bool >> > > continue_from_then_branch) >> > > +{ >> > > + nir_if *if_stmt = nir_cf_node_as_if(if_node); >> > > + if (continue_from_then_branch) { >> > > + /* Move the rest of the loop inside the then */ >> > > + nir_cf_reinsert(lst, >> > > nir_after_cf_node(nir_if_last_then_node(if_stmt))); >> > > + } else { >> > > + /* Move the rest of the loop inside the else */ >> > > + nir_cf_reinsert(lst, >> > > nir_after_cf_node(nir_if_last_else_node(if_stmt))); >> > > + } >> > > + >> > > + /* Remove the break */ >> > > + >> > > nir_instr_remove(nir_block_last_instr(nir_cf_node_as_block(last_no >> > > de))); >> > > +} >> > > + >> > > +static bool >> > > +is_phi_src_phi_from_loop_header(nir_ssa_def *def, nir_ssa_def >> > > *src) >> > > +{ >> > > + return def->parent_instr->type == nir_instr_type_phi && >> > > + src->parent_instr->type == nir_instr_type_phi && >> > > + nir_instr_as_phi(def->parent_instr)->instr.block->index == >> > > + nir_instr_as_phi(src->parent_instr)->instr.block->index; >> > >> > This last check can be shortened to "def->parent_instr->block == src- >> > >parent_instr->block". Converting to a phi and then doing ->instr is >> > just redundant. Also, I'd rather compre block pointers than indices >> > if we can. >> >> Sure. >> >> > >> > > +} >> > > + >> > > +static void >> > > +get_table_of_lcssa_and_loop_term_phis(nir_cf_node *loop, >> > > + struct hash_table >> > > **lcssa_phis, >> > > + struct hash_table >> > > **loop_term_phis, >> > > + nir_if *loop_term_if) >> > > +{ >> > > + *lcssa_phis = _mesa_hash_table_create(NULL, >> > > _mesa_hash_pointer, >> > > + _mesa_key_pointer_equal); >> > > + *loop_term_phis = _mesa_hash_table_create(NULL, >> > > _mesa_hash_pointer, >> > > + >> > > _mesa_key_pointer_equal); >> > > + >> > > + nir_cf_node *cf_node = nir_cf_node_next(loop); >> > > + nir_block *block = nir_cf_node_as_block(cf_node); >> > > + nir_foreach_instr(instr, block) { >> > > + if (instr->type == nir_instr_type_phi) { >> > > + nir_phi_instr *phi = nir_instr_as_phi(instr); >> > > + >> > > + nir_foreach_phi_src(src, phi) { >> > > + nir_block *then_blk = >> > > + >> > > nir_cf_node_as_block(nir_if_last_then_node(loop_term_if)); >> > > + nir_block *else_blk = >> > > + >> > > nir_cf_node_as_block(nir_if_last_else_node(loop_term_if)); >> > > + >> > > + if (src->pred == then_blk || src->pred == else_blk) { >> > > + _mesa_hash_table_insert(*loop_term_phis, phi, src- >> > > >src.ssa); >> > > + } else { >> > > + _mesa_hash_table_insert(*lcssa_phis, phi, src- >> > > >src.ssa); >> > > + } >> > > + } >> > > + } else { >> > > + /* There should be no more phis */ >> > > + break; >> > > + } >> > > + } >> > > +} >> > > + >> > > +static void >> > > +create_remap_tables(nir_loop *loop, nir_block *loop_header_blk, >> > > + struct hash_table **remap_table, >> > > + struct hash_table **phi_remap, >> > > + struct hash_table **src_before_loop, >> > > + struct hash_table **src_after_loop) >> > > +{ >> > > + *remap_table = _mesa_hash_table_create(NULL, >> > > _mesa_hash_pointer, >> > > + >> > > _mesa_key_pointer_equal); >> > > + *phi_remap = _mesa_hash_table_create(NULL, _mesa_hash_pointer, >> > > + _mesa_key_pointer_equal); >> > > + *src_before_loop = _mesa_hash_table_create(NULL, >> > > _mesa_hash_pointer, >> > > + >> > > _mesa_key_pointer_equal); >> > > + *src_after_loop = _mesa_hash_table_create(NULL, >> > > _mesa_hash_pointer, >> > > + >> > > _mesa_key_pointer_equal); >> > > + >> > > + /* Build hash tables used for remapping as we unroll. */ >> > > + nir_foreach_instr(instr, loop_header_blk) { >> > > + if (instr->type != nir_instr_type_phi) >> > > + break; >> > > + >> > > + nir_phi_instr *phi = nir_instr_as_phi(instr); >> > > + nir_foreach_phi_src(src, phi) { >> > > + /* Is the pred from the block itself? */ >> > >> > I'm not sure what this comment is trying to say. The logic below >> > determines whether or not the predecessor is from some block inside >> > the loop but not the header block. >> >> I'll update this to make a bit more sense. >> >> > >> > > + if (src->pred->index > phi->instr.block->index && >> > > + src->pred->cf_node.parent == &loop->cf_node) { >> > > + >> > > + _mesa_hash_table_insert(*phi_remap, &phi->dest.ssa, >> > > src->src.ssa); >> > > + _mesa_hash_table_insert(*src_after_loop, &phi- >> > > >dest.ssa, >> > > + src->src.ssa); >> > > + } else { >> > >> > Perhaps assert(src->pred == start_bloc || src->pred == >> > block_before_loop) >> >> Sure. >> >> > >> > > + _mesa_hash_table_insert(*remap_table, &phi->dest.ssa, >> > > + src->src.ssa); >> > > + _mesa_hash_table_insert(*src_before_loop, &phi- >> > > >dest.ssa, >> > > + src->src.ssa); >> > > + } >> > > + } >> > > + } >> > > +} >> > > + >> > > +static void >> > > +update_remap_tables(bool is_first_iteration, struct hash_table >> > > *remap_table, >> > > + struct hash_table *phi_remap, >> > > + struct hash_table *src_before_loop, >> > > + struct hash_table *src_after_loop) >> > > +{ >> > > + struct hash_entry *phi_hte; >> > > + hash_table_foreach(phi_remap, phi_hte) { >> > > + struct hash_entry *remap_hte = >> > > + _mesa_hash_table_search(remap_table, phi_hte->data); >> > > + >> > > + nir_ssa_def *phi_def = (nir_ssa_def *) phi_hte->key; >> > > + nir_ssa_def *phi_src = (nir_ssa_def *) phi_hte->data; >> > > + >> > > + if (!remap_hte && is_first_iteration) { >> > > + _mesa_hash_table_insert(remap_table, phi_hte->key, >> > > phi_hte->data); >> > > + continue; >> > > + } >> > > + >> > > + if (is_phi_src_phi_from_loop_header(phi_def, phi_src)) { >> > > + /* After copy propagation we can end up with phis inside >> > > loops >> > > + * that look like this: >> > > + * >> > > + * vec1 32 ssa_14 = phi block_0: ssa_9, block_4: >> > > ssa_13 >> > > + * vec1 32 ssa_13 = phi block_0: ssa_8, block_4: >> > > ssa_12 >> > > + * vec1 32 ssa_12 = phi block_0: ssa_7, block_4: >> > > ssa_11 >> > > + * vec1 32 ssa_11 = phi block_0: ssa_6, block_4: >> > > ssa_14 >> > > + * >> > > + * For each iteration of the loop we need to update the >> > > phi and >> > > + * cloning remap tables so that we use the correct src >> > > for the >> > > + * next iteration. >> > > + */ >> > > + struct hash_entry *sbl_hte = >> > > + _mesa_hash_table_search(src_before_loop, phi_hte- >> > > >data); >> > > + _mesa_hash_table_insert(remap_table, phi_hte->key, >> > > sbl_hte->data); >> > > + >> > > + struct hash_entry *sal_hte = >> > > + _mesa_hash_table_search(src_after_loop, phi_hte- >> > > >data); >> > > + phi_hte->data = sal_hte->data; >> > > + } else if (remap_hte) { >> > > + _mesa_hash_table_insert(remap_table, phi_hte->key, >> > > remap_hte->data); >> > > + } >> > >> > Hrm... I understand the problem but I would have thought we could >> > have done it with fewer remap tables... My naeve idea would be to do >> > as follows: >> > >> > In create_remap_tables, populate the remap table with phi_dst -> >> > phi_src where phi_src is the source with pred == block_before_loop. >> > >> > In update_remap_tables, populate the remap table with phi_dst -> >> > remap(phi_src) where phi_src is the soure with pred == continue_block >> > >> > Am I just over-simplifying things here? >> >> I believe you are. The remap table is updated by the cloning process >> our job here is to simply make sure it is in the right state before we >> do the next clone. The key thing is that we are always cloning the >> original header/body (were are not cloning the previous clone). >> > > Ah, that last point was a big part of what I was missing. > > >> So the above code is acting more as a reset, or initialisation of the >> remap table for the next iteration of the loop (although we want to >> keep what it already contains). >> >> Essentially the code is doing what you are suggesting you are just >> leaving out the part where updating phi_src with the source from pred >> == continue_block only works for the first iteration. For the next >> iteration we use the value already in the remap table after the clone, >> or the case where there is no mov i.e no clone we need to set things up >> manually using the src_before_loop/src_after_loop hash tables. >> >> Does this make sense? >> > > Yes, but I still think you could make it simpler and/or more obvious if > you did it in terms of the phi nodes themselves rather than these tables > that you generate from the phi nodes (which is really just a less > deterministic list of nodes). Something like this perhaps: > > nir_foreach_instr(instr, header_block) { > if (instr->type != nir_instr_type_phi) > break; > > nir_phi_instr *phi = nir_instr_as_phi(instr); > > nir_foreach_phi_src(src, phi) { > if (src->pred != continue_block) > continue; > > struct hash_entry *src_remap = _mesa_hash_table_search(remap, > src->src.ssa); > if (src_remap) { > _mesa_hash_table_insert(remap, &phi->dst.ssa, src_remap->data); > } else { > _mesa_hash_table_insert(remap, &phi->dst.ssa, src->src.ssa); > } > } > } > > That seems much more obvious than hash tables which are actually lists. > Sure, you could argue that walking the sources of phis looks inefficient > but, given that this is the simple case, they're guaranteed to have exactly > 2 sources, so meh. > > It's entirely possible that I'm still missing something... >
And... I just realized what I'm missing... :-) The phis may end up mapping to other phis whose entries in the remap table we've already mutated. Really, we need to do it in two steps: One to compute the set of remaps and then a second step to apply them. So my algorithm would have to be modified as follows: Add a phi_remap table and, in the above code, do the _mesa_hash_table_insert operations on the phi_remap. Then, do struct hash_entry *phi_entry; hash_table_foreach(phi_entry, phi_remap) _mesa_hash_table_insert(remap, phi_entry->key, phi_entry->data); to apply the remaps. Still, I think that's more obvious than the 4 tables approach. --Jason > It would be nice if we could distill this into an easy to follow >> comment to put on the function itself, happy for your input to help get >> to something that not only I understand :) >> >> How about something like: >> >> /* >> * The remap_table is updated by the cloning process, our job here is >> * to simply make sure it is in the right state before we do the next >> * clone. The key thing is that we are always cloning the original >> * header/body (were are not cloning the previous clone). >> * >> * Since the cloning process is updating the remap_table we can think >> * of this function as acting more as a reset, or initialisation of the >> * remap table for the next iteration of the loop (although we want to >> * keep what it already contains). >> * >> * After the first iteration we use the value already in remap_table to >> * update the phi_src with the new src from the previous clone, or in >> * the case where there is no mov i.e no clone, we update the >> * remap_table using the src_before_loop/src_after_loop hash tables. >> */ >> >> > >> > > + } >> > > +} >> > > + >> > > +static void >> > > +insert_phi_and_set_block_on_uses(nir_builder *b, nir_phi_instr >> > > *phi_instr) >> > > +{ >> > > + nir_instr_insert(b->cursor, &phi_instr->instr); >> > > + >> > > + /* Now that we have inserted the phi fix up the block for its >> > > uses. */ >> > > + nir_foreach_use_safe(use_src, &phi_instr->dest.ssa) { >> > > + nir_phi_instr *use_phi = nir_instr_as_phi(use_src- >> > > >parent_instr); >> > > + >> > > + foreach_list_typed(nir_phi_src, src, node, &use_phi->srcs) { >> > > + if (!src->pred) >> > > + src->pred = phi_instr->dest.ssa.parent_instr->block; >> > > + } >> > > + } >> > > +} >> > > + >> > > +static nir_phi_instr * >> > > +create_complex_unroll_phi(nir_shader *ns, nir_phi_instr >> > > *prev_phi_instr) >> > > +{ >> > > + nir_phi_instr *new_phi = nir_phi_instr_create(ns); >> > > + nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, >> > > + prev_phi_instr->dest.ssa.bit_size, NULL); >> > > + >> > > + /* Add the new phi as a src to the phi from the previous >> > > iteration */ >> > > + nir_phi_src *new_src = ralloc(prev_phi_instr, nir_phi_src); >> > > + new_src->src = nir_src_for_ssa(&new_phi->dest.ssa); >> > > + new_src->src.parent_instr = &prev_phi_instr->instr; >> > > + exec_list_push_tail(&prev_phi_instr->srcs, &new_src->node); >> > > + list_addtail(&new_src->src.use_link, &new_src->src.ssa->uses); >> > > + >> > > + return new_phi; >> > > +} >> > > + >> > > +static void >> > > +add_complex_unroll_phi_src(nir_ssa_def *phi_src, nir_phi_instr >> > > *phi_instr, >> > > + struct hash_table *remap_table, >> > > nir_block *blk) >> > > +{ >> > > + struct hash_entry *hte = >> > > + _mesa_hash_table_search(remap_table, phi_src); >> > > + >> > > + nir_phi_src *new_src = ralloc(phi_instr, nir_phi_src); >> > > + nir_ssa_def *ssa_def = hte ? (nir_ssa_def *) hte->data : >> > > phi_src; >> > > + new_src->pred = blk; >> > > + new_src->src = nir_src_for_ssa(ssa_def); >> > > + new_src->src.parent_instr = &phi_instr->instr; >> > > + list_addtail(&new_src->src.use_link, &new_src->src.ssa->uses); >> > > + >> > > + exec_list_push_tail(&phi_instr->srcs, &new_src->node); >> > > +} >> > > + >> > > +static void >> > > +simple_loop_fix_lcssa_phis(nir_cf_node *loop, struct hash_table >> > > *remap_table) >> > > +{ >> > > + nir_block *prev_block = >> > > nir_cf_node_as_block(nir_cf_node_prev(loop)); >> > > + nir_cf_node *cf_node = nir_cf_node_next(loop); >> > > + assert(cf_node->type == nir_cf_node_block); >> > > + >> > > + nir_block *block = nir_cf_node_as_block(cf_node); >> > > + nir_foreach_instr_safe(instr, block) { >> > > + if (instr->type == nir_instr_type_phi) { >> > > + nir_phi_instr *phi = nir_instr_as_phi(instr); >> > > + >> > > + nir_foreach_phi_src_safe(src, phi) { >> > > + /* Update predecessor */ >> > > + src->pred = prev_block; >> > > + >> > > + /* Update src */ >> > > + struct hash_entry *hte = >> > > + _mesa_hash_table_search(remap_table, src->src.ssa); >> > > + assert(hte || !phi->is_lcssa_phi); >> > > + if (hte) { >> > > + nir_src new_src = nir_src_for_ssa((nir_ssa_def *) >> > > hte->data); >> > > + if (phi->is_lcssa_phi || exec_list_length(&phi- >> > > >srcs) == 1) { >> > > + nir_ssa_def_rewrite_uses(&phi->dest.ssa, >> > > new_src); >> > > + } else { >> > > + nir_instr_rewrite_src(instr, &src->src, >> > > new_src); >> > > + } >> > > + } else { >> > > + /* If a non lcssa phi now only has 1 src rewrite >> > > its uses here. >> > > + * This avoids the src getting rewritten to an >> > > undefined def, >> > > + * which appears to be done in nir_cf_node_remove() >> > > when >> > > + * removing the loop. >> > > + */ >> > > + if (exec_list_length(&phi->srcs) == 1) { >> > > + struct exec_node *head = >> > > exec_list_get_head(&phi->srcs); >> > > + nir_phi_src *phi_src = >> > > exec_node_data(nir_phi_src, head, node); >> > > + nir_ssa_def_rewrite_uses(&phi->dest.ssa, >> > > phi_src->src); >> > > + } >> > > + } >> > > + } >> > > + if (phi->is_lcssa_phi || exec_list_length(&phi->srcs) == >> > > 1) >> > > + nir_instr_remove(&phi->instr); >> > >> > I'm not convinced that this logic is at all what we want. It seems >> > like what we want is more something along these lines: >> > >> > nir_foreach_phi_src(src, phi) { >> > if (src->pred != block_with_break_in_terminator) >> > continue; >> >> I was under the impression NIR never delays the use of phis so no phi >> following a loop should ever not meet this condition right? >> > > Right. For simple loops they should all be lcssa phis with exactly one > source. > > >> > >> > struct hash_entry *hte = _mesa_hash_table_search(remap_table, src- >> > >src.ssa); >> > if (hte) { >> > nir_ssa_def_rewrite_uses(&phi->dest.ssa, >> > nir_src_for_ssa((nir_ssa_def *)hte->data)); >> > } else { >> > nir_ssa_def_rewrite_uses(&phi->dest.ssa, src->src); >> > } >> > } >> > assert(list_is_empty(&phi->dest.ssa.uses)); >> > >> > What we really want for the phis after the loop is to take the value >> > that it would have taken coming from the break. >> >> Right. Which is what the code does but for simple loops there should >> only ever be a single exit point so your simpler code *should* work. >> I'll give it a try thanks. >> >> > >> > > + } else { >> > > + /* There should be no more LCSSA-phis */ >> > > + break; >> > > + } >> > > + } >> > > +} >> > > + >> > > +static bool >> > > +ends_in_break(nir_block *block) >> > > +{ >> > > + if (exec_list_is_empty(&block->instr_list)) >> > > + return false; >> > > + >> > > + nir_instr *instr = nir_block_last_instr(block); >> > > + return instr->type == nir_instr_type_jump && >> > > + nir_instr_as_jump(instr)->type == nir_jump_break; >> > > +} >> > > + >> > > +/** >> > > + * Unroll a loop which does not contain any jumps. For example, >> > > if the input >> > > + * is: >> > > + * >> > > + * (loop (...) ...instrs...) >> > > + * >> > > + * And the iteration count is 3, the output will be: >> > > + * >> > > + * ...instrs... ...instrs... ...instrs... >> > > + */ >> > > +static void >> > > +simple_unroll(nir_function *fn, nir_loop *loop, nir_builder *b) >> > > +{ >> > > + nir_shader *ns = fn->shader; >> > > + >> > > + /* Get the loop header this contains a bunch of phis and the >> > > loops >> > > + * conditional. >> > > + */ >> > > + nir_cf_node *lp_header_cf_node = nir_loop_first_cf_node(loop); >> > > + nir_block *loop_header_blk = >> > > nir_cf_node_as_block(lp_header_cf_node); >> > > + >> > > + struct hash_table *remap_table; >> > > + struct hash_table *phi_remap; >> > > + struct hash_table *src_before_loop; >> > > + struct hash_table *src_after_loop; >> > > + create_remap_tables(loop, loop_header_blk, &remap_table, >> > > &phi_remap, >> > > + &src_before_loop, &src_after_loop); >> > > + >> > > + /* Skip over loop terminator and get the loop body. */ >> > > + nir_cf_node *if_node = &loop->info->limiting_terminator->nif- >> > > >cf_node; >> > > + list_for_each_entry(nir_loop_terminator, terminator, >> > > + &loop->info->loop_terminator_list, >> > > loop_terminator_link) { >> > > + nir_cf_node *loop_node = &terminator->nif->cf_node; >> > > + >> > > + /* Remove all but the limiting terminator as we know the >> > > other exit >> > > + * conditions can never be met. >> > > + */ >> > > + if (loop_node != &loop->info->limiting_terminator->nif- >> > > >cf_node) { >> > > + nir_cf_node_remove(loop_node); >> > >> > Can we really do this? What happens if the terminator is >> > >> > if (...) { >> > /*stuff */ >> > break; >> > } else { >> > /* other stuff */ >> > } >> > >> > Answer (from IRC): That can never happen since loop analysis will >> > bail in that case. >> > >> > Also, is it actually necessary to delete the other terminators? Can >> > we just delete the breaks from them and trust dead_cf to clean them >> > up properly? >> >> I *think* we can do this (and I did a some stage) but to what ends? The >> code to do so would be more complex and it means we are cloning a whole >> bunch of extra if staments. >> > > Just thinking through things. :-) If we want to deal with non-simple > if's, that might be a good way to go. > > >> > >> > > + } >> > > + } >> > > + >> > > + nir_cf_node *cf_node = nir_cf_node_next(if_node); >> > > + >> > > + /* Pluck out the loop header */ >> > > + nir_cf_list lp_header; >> > > + nir_cf_extract(&lp_header, >> > > nir_before_cf_node(lp_header_cf_node), >> > > + nir_before_cf_node(if_node)); >> > > + >> > > + /* Pluck out the loop body */ >> > > + nir_cf_list loop_body; >> > > + extract_loop_body(&loop_body, cf_node); >> > >> > It might be clearer if this were >> > >> > nir_cf_extract(&loop_body, nir_after_cf_node(if_node), >> > nir_after_cf_node(nir_loop_last_cf_node(loop))); >> > >> > I'm not sure the extract_loop_body helper is actually helping you. >> >> Sure. >> >> > >> > > + >> > > + /* Clone the loop header */ >> > > + nir_cf_list cloned_header; >> > > + exec_list_make_empty(&cloned_header.list); >> > > + cloned_header.impl = loop_body.impl; >> > > + >> > > + clone_list(ns, loop, &lp_header, &cloned_header, remap_table); >> > >> > It would be cool if these 4 lines were just >> > >> > nir_cf_list_clone(&lp_header, &cloned_header, remap_table); >> >> Hmm. I'm not sure thats better. It would still be at least 2 lines: >> >> nir_cf_list cloned_header; >> nir_cf_list_clone(&lp_header, &cloned_header, remap_table); >> > > Sure, but it would also simplify the clone implementation a bit and a > cf_list is a well-defined thing in NIR where as an arbitrary exec_list with > things that we've been promissed are cf_nodes isn't so much. > > >> But now we are calling: >> >> exec_list_make_empty(&cloned_header.list); >> cloned_header.impl = loop_body.impl; >> >> Inside the loop below when we don't need to. Although I guess it's >> normally not going to be a huge number of iterations so I guess that >> its worth it for cleaner code. Ok I'm convinced I'll change it. >> >> > >> > > + >> > > + /* Insert cloned loop header before the loop */ >> > > + b->cursor = nir_before_cf_node(&loop->cf_node); >> > > + nir_cf_reinsert(&cloned_header, b->cursor); >> > > + >> > > + /* Create temp block to store the cloned loop body as we unroll >> > > */ >> > > + nir_cf_list unrolled_lp_body; >> > > + exec_list_make_empty(&unrolled_lp_body.list); >> > > + unrolled_lp_body.impl = loop_body.impl; >> > > + >> > > + /* Clone loop header and append to the loop body */ >> > > + for (unsigned i = 0; i < loop->info->trip_count; i++) { >> > > + /* Clone loop body */ >> > > + clone_list(ns, loop, &loop_body, &unrolled_lp_body, >> > > remap_table); >> > > + >> > > + update_remap_tables(i == 0, remap_table, phi_remap, >> > > src_before_loop, >> > > + src_after_loop); >> > > + >> > > + /* Insert unrolled loop body before the loop */ >> > > + b->cursor = nir_before_cf_node(&loop->cf_node); >> > > + nir_cf_reinsert(&unrolled_lp_body, b->cursor); >> > > + >> > > + /* Clone loop header */ >> > > + clone_list(ns, loop, &lp_header, &cloned_header, >> > > remap_table); >> > > + >> > > + /* Insert loop header after loop body */ >> > > + b->cursor = nir_before_cf_node(&loop->cf_node); >> > > + nir_cf_reinsert(&cloned_header, b->cursor); >> > > + } >> > > + >> > > + /* The loop has been unrolled so remove it. */ >> > > + simple_loop_fix_lcssa_phis(&loop->cf_node, remap_table); >> > > + >> > > + /* Remove the loop */ >> > > + nir_cf_node_remove(&loop->cf_node); >> > > + >> > > + /* Delete the original loop body & header */ >> > > + nir_cf_delete(&lp_header); >> > > + nir_cf_delete(&loop_body); >> > > + >> > > + _mesa_hash_table_destroy(remap_table, NULL); >> > > + _mesa_hash_table_destroy(phi_remap, NULL); >> > > + _mesa_hash_table_destroy(src_before_loop, NULL); >> > > + _mesa_hash_table_destroy(src_after_loop, NULL); >> > >> > I think I would be mildly happier if you used a ralloc context rather >> > than destroying them all manually. >> >> Yeah that would have saved a few crashes when writing the complex >> unroll code. Will do. >> >> > >> > > +} >> > > + >> > > +/** >> > > + * Unroll a loop with two exists when the trip count of one of the >> > > exits is >> > > + * unknown. If continue_from_then_branch is true, the loop is >> > > repeated only >> > > + * when the "then" branch of the if is taken; otherwise it is >> > > repeated only >> > > + * when the "else" branch of the if is taken. >> > > + * >> > > + * For example, if the input is: >> > > + * >> > > + * (loop (...) >> > > + * ...body... >> > > + * (if (cond) >> > > + * (...then_instrs...) >> > > + * (...else_instrs...))) >> > > + * >> > > + * And the iteration count is 3, and \c continue_from_then_branch >> > > is true, >> > > + * then the output will be: >> > > + * >> > > + * ...body... >> > > + * (if (cond) >> > > + * (...then_instrs... >> > > + * ...body... >> > > + * (if (cond) >> > > + * (...then_instrs... >> > > + * ...body... >> > > + * (if (cond) >> > > + * (...then_instrs...) >> > > + * (...else_instrs...))) >> > > + * (...else_instrs...))) >> > > + * (...else_instrs)) >> > > + */ >> > > +static void >> > > +complex_unroll(nir_function *fn, nir_loop *loop, nir_builder *b, >> > > + nir_cf_node *if_node, nir_cf_node *last_node, >> > > + bool continue_from_then_branch, bool >> > > limiting_term_second) >> > > +{ >> > > + nir_cf_node *limiting_trm = &loop->info->limiting_terminator- >> > > >nif->cf_node; >> > > + nir_cf_node *lp_header_cf_node = nir_loop_first_cf_node(loop); >> > > + nir_block *loop_header_blk = >> > > nir_cf_node_as_block(lp_header_cf_node); >> > > + >> > > + struct hash_table *remap_table; >> > > + struct hash_table *phi_remap; >> > > + struct hash_table *src_before_loop; >> > > + struct hash_table *src_after_loop; >> > > + create_remap_tables(loop, loop_header_blk, &remap_table, >> > > &phi_remap, >> > > + &src_before_loop, &src_after_loop); >> > > + >> > > + struct hash_table *loop_phis; >> > > + struct hash_table *loop_term_phis; >> > > + get_table_of_lcssa_and_loop_term_phis(&loop->cf_node, >> > > &loop_phis, >> > > + &loop_term_phis, >> > > + loop->info- >> > > >limiting_terminator->nif); >> > > + >> > > + if (limiting_term_second) { >> > > + /* We need some special handling when its the second >> > > terminator causing >> > > + * us to exit the loop for example: >> > > + * >> > > + * for (int i = 0; i < uniform_lp_count; i++) { >> > > + * colour = vec4(0.0, 1.0, 0.0, 1.0); >> > > + * >> > > + * if (i == 1) >> > > + * break; >> > > + * } >> > > + * ... any further code is unreachable after i == 1 ... >> > > + * } >> > > + * >> > > + * Bump the trip count by one so we actually clone >> > > something. Also >> > > + * extract everything after the limiting terminator and >> > > insert it into >> > > + * the branch we will continue from. >> > > + */ >> > > + loop->info->trip_count++; >> > > + >> > > + nir_cf_list after_lt; >> > > + extract_loop_body(&after_lt, >> > > nir_cf_node_next(limiting_trm)); >> > > + >> > > + nir_if *if_stmt = loop->info->limiting_terminator->nif; >> > > + nir_cf_node *last_then = nir_if_last_then_node(if_stmt); >> > > + if (last_then->type == nir_cf_node_block && >> > > + ends_in_break(nir_cf_node_as_block(last_then))) { >> > > + move_cf_list_into_if(&after_lt, limiting_trm, last_then, >> > > false); >> > > + } else { >> > > + nir_cf_node *last_else = nir_if_last_else_node(if_stmt); >> > > + if (last_else->type == nir_cf_node_block && >> > > + ends_in_break(nir_cf_node_as_block(last_else))) { >> > > + move_cf_list_into_if(&after_lt, limiting_trm, >> > > last_else, true); >> > > + } >> > > + } >> > > + } else { >> > > + /* Remove the limiting terminator. Loop analysis will only >> > > find a >> > > + * terminator for trival if statments (then only contains >> > > break, else >> > > + * is empty) so its safe to remove the whole thing. >> > > + */ >> > > + nir_cf_node_remove(limiting_trm); >> > > + } >> > > + >> > > + nir_shader *ns = fn->shader; >> > > + struct hash_table *lcssa_phis = >> > > + _mesa_hash_table_create(NULL, _mesa_hash_pointer, >> > > + _mesa_key_pointer_equal); >> > > + >> > > + /* Create phis to be used post-if (replacements for the post- >> > > loop phis) */ >> > > + struct hash_entry *phi_hte; >> > > + hash_table_foreach(loop_phis, phi_hte) { >> > > + nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte->key; >> > > + nir_phi_instr *new_phi = create_complex_unroll_phi(ns, >> > > phi_instr); >> > > + >> > > + nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte->data; >> > > + _mesa_hash_table_insert(lcssa_phis, new_phi, ssa_def); >> > > + >> > > + /* Update loop_phis to point to the replacement phi */ >> > > + phi_hte->data = &new_phi->dest.ssa; >> > > + >> > > + struct hash_entry *loop_term_hte = >> > > + _mesa_hash_table_search(loop_term_phis, phi_hte->key); >> > > + if (loop_term_hte) { >> > > + _mesa_hash_table_insert(loop_term_phis, new_phi, >> > > loop_term_hte->data); >> > > + _mesa_hash_table_remove(loop_term_phis, loop_term_hte); >> > > + } >> > > + } >> > > + >> > > + /* Move everything after the terminator we don't have a trip >> > > count for >> > > + * inside the if. >> > > + */ >> > > + nir_cf_list loop_end; >> > > + extract_loop_body(&loop_end, nir_cf_node_next(if_node)); >> > > + nir_if *if_stmt = nir_cf_node_as_if(if_node); >> > > + move_cf_list_into_if(&loop_end, if_node, last_node, >> > > + continue_from_then_branch); >> > > + >> > > + /* Pluck out the loop body. Unlike the simple unroll pass there >> > > are no >> > > + * breaks remaining in the loop so we do not have the concept >> > > of a loop >> > > + * header and a loop body, instead we just extract everything. >> > > + */ >> > > + nir_cf_list loop_body; >> > > + extract_loop_body(&loop_body, lp_header_cf_node); >> > > + >> > > + /* Create temp block to store the cloned loop body as we unroll >> > > */ >> > > + nir_cf_list unrolled_lp_body; >> > > + exec_list_make_empty(&unrolled_lp_body.list); >> > > + unrolled_lp_body.impl = loop_body.impl; >> > > + >> > > + /* Set the cursor to before the loop */ >> > > + b->cursor = nir_before_cf_node(&loop->cf_node); >> > > + >> > > + nir_cf_node *continue_from_node = NULL; >> > > + for (unsigned i = 0; i < loop->info->trip_count; i++) { >> > > + /* Clone loop body */ >> > > + clone_list(ns, loop, &loop_body, &unrolled_lp_body, >> > > remap_table); >> > > + >> > > + nir_cf_node *last_node = >> > > + exec_node_data(nir_cf_node, >> > > + >> > > exec_list_get_tail(&unrolled_lp_body.list), node); >> > > + assert(last_node->type == nir_cf_node_block && >> > > + exec_list_is_empty(&nir_cf_node_as_block(last_node)- >> > > >instr_list)); >> > > + >> > > + /* Insert unrolled loop body */ >> > > + nir_cf_reinsert(&unrolled_lp_body, b->cursor); >> > > + >> > > + nir_cf_node *if_node = nir_cf_node_prev(last_node); >> > > + assert(if_node->type == nir_cf_node_if); >> > > + if_stmt = nir_cf_node_as_if(if_node); >> > > + >> > > + nir_cf_node *exit_from_node; >> > > + if (continue_from_then_branch) { >> > > + continue_from_node = nir_if_last_then_node(if_stmt); >> > > + exit_from_node = nir_if_last_else_node(if_stmt); >> > > + } else { >> > > + exit_from_node = nir_if_last_then_node(if_stmt); >> > > + continue_from_node = nir_if_last_else_node(if_stmt); >> > > + } >> > > + >> > > + b->cursor = nir_after_cf_node(if_node); >> > > + if (i < loop->info->trip_count - 1) { >> > > + struct hash_table *tmp = >> > > + _mesa_hash_table_create(NULL, _mesa_hash_pointer, >> > > + _mesa_key_pointer_equal); >> > > + >> > > + struct hash_entry *phi_hte; >> > > + hash_table_foreach(lcssa_phis, phi_hte) { >> > > + /* Insert phi created in previous iteration */ >> > > + nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte- >> > > >key; >> > > + insert_phi_and_set_block_on_uses(b, phi_instr); >> > > + >> > > + nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte->data; >> > > + add_complex_unroll_phi_src(ssa_def, phi_instr, >> > > remap_table, >> > > + >> > > nir_cf_node_as_block(exit_from_node)); >> > > + >> > > + /* Create phi to be fixed up by next iteration */ >> > > + nir_phi_instr *new_phi = create_complex_unroll_phi(ns, >> > > phi_instr); >> > > + _mesa_hash_table_insert(tmp, new_phi, ssa_def); >> > > + >> > > + struct hash_entry *loop_term_hte = >> > > + _mesa_hash_table_search(loop_term_phis, phi_hte- >> > > >key); >> > > + if (loop_term_hte) { >> > > + _mesa_hash_table_insert(loop_term_phis, new_phi, >> > > + loop_term_hte->data); >> > > + _mesa_hash_table_remove(loop_term_phis, >> > > loop_term_hte); >> > > + } >> > > + } >> > > + >> > > + /* Now that the phis have been processed replace the >> > > table with the >> > > + * phis to be fixed up in the next iteration. >> > > + */ >> > > + _mesa_hash_table_destroy(lcssa_phis, NULL); >> > > + lcssa_phis = tmp; >> > > + } else { >> > > + struct hash_entry *phi_hte; >> > > + hash_table_foreach(lcssa_phis, phi_hte) { >> > > + /* Insert phi created in previous iteration */ >> > > + nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte- >> > > >key; >> > > + insert_phi_and_set_block_on_uses(b, phi_instr); >> > > + >> > > + nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte->data; >> > > + add_complex_unroll_phi_src(ssa_def, phi_instr, >> > > remap_table, >> > > + >> > > nir_cf_node_as_block(exit_from_node)); >> > > + } >> > > + } >> > > + >> > > + /* Ready the remap tables for the next iteration */ >> > > + update_remap_tables(i == 0, remap_table, phi_remap, >> > > src_before_loop, >> > > + src_after_loop); >> > > + >> > > + /* Set the cursor to the last if in the loop body we just >> > > unrolled ready >> > > + * for the next iteration. >> > > + */ >> > > + b->cursor = nir_after_cf_node(continue_from_node); >> > > + } >> > > + >> > > + /* Now that the remap table is updated add the second src to >> > > the innermost >> > > + * phis. >> > > + */ >> > > + hash_table_foreach(lcssa_phis, phi_hte) { >> > > + nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte->key; >> > > + nir_ssa_def *phi_src = (nir_ssa_def *) phi_hte->data; >> > > + >> > > + assert(exec_list_length(&phi_instr->srcs) == 1); >> > > + >> > > + /* Get the src for when exiting by the loop terminator */ >> > > + struct hash_entry *loop_term_hte = >> > > + _mesa_hash_table_search(loop_term_phis, phi_instr); >> > > + if (loop_term_hte) >> > > + phi_src = (nir_ssa_def *) loop_term_hte->data; >> > > + >> > > + add_complex_unroll_phi_src(phi_src, phi_instr, remap_table, >> > > + >> > > nir_cf_node_as_block(continue_from_node)); >> > > + } >> > > + >> > > + /* Rewrite the uses of the old loop phis */ >> > > + hash_table_foreach(loop_phis, phi_hte) { >> > > + nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte->key; >> > > + >> > > + nir_foreach_use_safe(use_src, &phi_instr->dest.ssa) { >> > > + nir_src new_src = nir_src_for_ssa((nir_ssa_def *) >> > > phi_hte->data); >> > > + nir_instr_rewrite_src(use_src->parent_instr, use_src, >> > > new_src); >> > > + } >> > > + >> > > + nir_foreach_if_use_safe(use_src, &phi_instr->dest.ssa) { >> > > + nir_src new_src = nir_src_for_ssa((nir_ssa_def *) >> > > phi_hte->data); >> > > + nir_if_rewrite_condition(use_src->parent_if, new_src); >> > > + } >> > > + } >> > > + >> > > + /* The loop has been unrolled so remove it. */ >> > > + nir_cf_node_remove(&loop->cf_node); >> > > + >> > > + /* Delete the original loop body */ >> > > + nir_cf_delete(&loop_body); >> > > + >> > > + _mesa_hash_table_destroy(loop_phis, NULL); >> > > + _mesa_hash_table_destroy(loop_term_phis, NULL); >> > > + _mesa_hash_table_destroy(lcssa_phis, NULL); >> > > + _mesa_hash_table_destroy(remap_table, NULL); >> > > + _mesa_hash_table_destroy(phi_remap, NULL); >> > > + _mesa_hash_table_destroy(src_before_loop, NULL); >> > > + _mesa_hash_table_destroy(src_after_loop, NULL); >> > >> > ralloc context? >> > >> > > +} >> > > + >> > > +static bool >> > > +process_loops(nir_cf_node *cf_node, nir_builder *b, bool >> > > *innermost_loop) >> > > +{ >> > > + bool progress = false; >> > > + nir_loop *loop; >> > > + >> > > + switch (cf_node->type) { >> > > + case nir_cf_node_block: >> > > + return progress; >> > > + case nir_cf_node_if: { >> > > + nir_if *if_stmt = nir_cf_node_as_if(cf_node); >> > > + foreach_list_typed_safe(nir_cf_node, nested_node, node, >> > > &if_stmt->then_list) >> > > + progress |= process_loops(nested_node, b, >> > > innermost_loop); >> > > + foreach_list_typed_safe(nir_cf_node, nested_node, node, >> > > &if_stmt->else_list) >> > > + progress |= process_loops(nested_node, b, >> > > innermost_loop); >> > > + return progress; >> > > + } >> > > + case nir_cf_node_loop: { >> > > + loop = nir_cf_node_as_loop(cf_node); >> > > + foreach_list_typed_safe(nir_cf_node, nested_node, node, >> > > &loop->body) >> > > + progress |= process_loops(nested_node, b, >> > > innermost_loop); >> > > + break; >> > > + } >> > > + default: >> > > + unreachable("unknown cf node type"); >> > > + } >> > > + >> > > + if (*innermost_loop) { >> > > + nir_function *fn = nir_cf_node_get_function(&loop->cf_node)- >> > > >function; >> > > + >> > > + /* Don't attempt to unroll outer loops or a second inner >> > > loop in >> > > + * this pass wait until the next pass as we have altered the >> > > cf. >> > > + */ >> > > + *innermost_loop = false; >> > > + >> > > + if (loop->info->limiting_terminator == NULL) { >> > > + return progress; >> > > + } >> > > + >> > > + if (is_simple_loop(fn->shader, loop->info)) { >> > > + simple_unroll(fn, loop, b); >> > > + progress = true; >> > > + } else { >> > > + /* Attempt to unroll loops with two terminators. */ >> > > + if (is_complex_loop(fn->shader, loop->info)) { >> > > + bool first_terminator = true; >> > > + list_for_each_entry(nir_loop_terminator, terminator, >> > > + &loop->info->loop_terminator_list, >> > > + loop_terminator_link) { >> > > + >> > > + nir_cf_node *if_node = &terminator->nif->cf_node; >> > > + >> > > + if (if_node == &loop->info->limiting_terminator- >> > > >nif->cf_node) { >> > > + first_terminator = false; >> > > + continue; >> > > + } >> > > + >> > > + /* If the first terminator has a trip count of zero >> > > just do a >> > > + * simple unroll as the second terminator can never >> > > be reached. >> > > + */ >> > > + if (loop->info->trip_count == 0 && >> > > first_terminator) { >> > > + simple_unroll(fn, loop, b); >> > > + progress = true; >> > > + break; >> > > + } >> > > + >> > > + nir_if *if_stmt = nir_cf_node_as_if(if_node); >> > > + >> > > + /* Determine which if-statement branch, if any, >> > > ends with a >> > > + * break. Note that since predicted_num_loop_jumps >> > > == 1, it is >> > > + * impossible for both branches to end with a >> > > break. >> > > + */ >> > > + nir_cf_node *last_then = >> > > nir_if_last_then_node(if_stmt); >> > > + if (last_then->type == nir_cf_node_block && >> > > + ends_in_break(nir_cf_node_as_block(last_then))) >> > > { >> > > + >> > > + complex_unroll(fn, loop, b, if_node, last_then, >> > > false, >> > > + !first_terminator); >> > > + >> > > + progress = true; >> > > + break; >> > > + } else { >> > > + nir_cf_node *last_else = >> > > nir_if_last_else_node(if_stmt); >> > > + if (last_else->type == nir_cf_node_block && >> > > + >> > > ends_in_break(nir_cf_node_as_block(last_else))) { >> > > + >> > > + complex_unroll(fn, loop, b, if_node, >> > > last_else, true, >> > > + !first_terminator); >> > > + >> > > + progress = true; >> > > + break; >> > > + } >> > > + } >> > > + } >> > > + } >> > > + } >> > > + } >> > > + >> > > + return progress; >> > > +} >> > > + >> > > +static bool >> > > +nir_opt_loop_unroll_impl(nir_function_impl *impl, >> > > + nir_variable_mode indirect_mask) >> > > +{ >> > > + bool progress = false; >> > > + nir_metadata_require(impl, nir_metadata_loop_analysis, >> > > indirect_mask); >> > > + >> > > + nir_builder b; >> > > + nir_builder_init(&b, impl); >> > > + >> > > + foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { >> > > + bool innermost_loop = true; >> > > + progress |= process_loops(node, &b, &innermost_loop); >> > > + } >> > > + >> > > + return progress; >> > > +} >> > > + >> > > +bool >> > > +nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode >> > > indirect_mask) >> > > +{ >> > > + bool progress = false; >> > > + >> > > + nir_foreach_function(function, shader) { >> > > + if (function->impl) { >> > > + progress |= nir_opt_loop_unroll_impl(function->impl, >> > > indirect_mask); >> > > + } >> > > + } >> > > + return false; >> > > +} >> > > -- >> > > 2.7.4 >> > > >> > > _______________________________________________ >> > > mesa-dev mailing list >> > > mesa-dev@lists.freedesktop.org >> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > > >> > >> > _______________________________________________ >> > mesa-dev mailing list >> > mesa-dev@lists.freedesktop.org >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev