--- src/compiler/Makefile.sources | 1 + src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_opt_loop_unroll.c | 394 +++++++++++++++++++++++++++++++++ 3 files changed, 397 insertions(+) create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 79de484..a9f104d 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -231,6 +231,7 @@ NIR_FILES = \ nir/nir_opt_dead_cf.c \ nir/nir_opt_gcm.c \ nir/nir_opt_global_to_local.c \ + nir/nir_opt_loop_unroll.c \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ nir/nir_opt_undef.c \ diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9083bd0..81d9dfc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2676,6 +2676,8 @@ bool nir_opt_dead_cf(nir_shader *shader); void nir_opt_gcm(nir_shader *shader); +bool nir_opt_loop_unroll(nir_shader *shader); + bool nir_opt_peephole_select(nir_shader *shader); bool nir_opt_remove_phis(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c new file mode 100644 index 0000000..22530c9 --- /dev/null +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -0,0 +1,394 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_control_flow.h" + +typedef struct { + /* Array of loops */ + nir_loop *loops; + + /* Array of unroll factors */ + int *factors; +} unroll_vector; + +typedef struct { + /* Array of loop infos for the loop nest */ + nir_loop_info *li; + + /* List of unroll vectors */ + struct list_head unroll_vectors; + + nir_shader_compiler_options *options; +} loop_unroll_state; + +static void +extract_loop_body(nir_cf_list *extracted, nir_cf_node *node, nir_shader *ns) +{ + nir_cf_node *end = node; + while (!nir_cf_node_is_last(end)) + end = nir_cf_node_next(end); + + nir_cf_loop_list_extract(extracted, node, end); +} + +static void +clone_list(nir_shader *ns, nir_loop *loop, nir_cf_list *src_cf_list, + nir_cf_list *cloned_cf_list, struct hash_table *remap_table, + struct hash_table *phi_remap) +{ + /* Dest list needs to at least have one block */ + nir_block *nblk = nir_block_create(ns); + nblk->cf_node.parent = loop->cf_node.parent; + exec_list_push_tail(&cloned_cf_list->list, &nblk->cf_node.node); + + nir_clone_loop_list(&cloned_cf_list->list, &src_cf_list->list, + remap_table, phi_remap, ns); +} + +static void +remove_unrolled_loop(nir_cf_node *loop, nir_block *block_before_loop, + struct hash_table *remap_table, + struct hash_table *phi_remap, + nir_function_impl *impl) +{ + /* Fixup LCSSA-phi srcs */ + nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(loop)); + nir_cf_node *cf_node = nir_cf_node_next(loop); + assert(cf_node->type == nir_cf_node_block); + + nir_block *block = nir_cf_node_as_block(cf_node); + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(instr); + assert(phi->is_lcssa_phi); + + if (nir_cf_node_as_loop(loop)->info->trip_count != 0) { + nir_foreach_phi_src_safe(src, phi) { + /* Update predecessor */ + src->pred = prev_block; + + /* Update src */ + struct hash_entry *hte = + _mesa_hash_table_search(phi_remap, src->src.ssa); + if (hte) { + nir_src new_src = nir_src_for_ssa((nir_ssa_def *) hte->data); + nir_instr_rewrite_src(instr, &src->src, new_src); + } else { + /* If the LCSSA src was not a phi fallback to following the + * the remappings in the remap table. + */ + nir_ssa_def *ssa_def = src->src.ssa; + while ((hte = _mesa_hash_table_search(remap_table, + ssa_def))) { + ssa_def = (nir_ssa_def *) hte->data; + } + + if (ssa_def != src->src.ssa){ + nir_src new_src = nir_src_for_ssa(ssa_def); + nir_instr_rewrite_src(instr, &src->src, new_src); + } + } + } + } + nir_opt_remove_phi(phi); + } else { + /* There should be no more LCSSA-phis */ + break; + } + } + + /* After cloning the loop and fixing up the LCSSA-phis we need to remove + * any obsolete phis at the beginning of the new unrolled block. To avoid + * failing validation. + */ + nir_foreach_instr_safe(instr, block_before_loop) { + if (instr->type != nir_instr_type_phi) + continue; + + /* Replace phi */ + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_opt_remove_phi(phi); + } + + /* Remove the loop */ + nir_loop *nir_loop = nir_cf_node_as_loop(loop); + foreach_list_typed(nir_cf_node, child, node, &nir_loop->body) + nir_cleanup_cf_node(child, impl, false); + + exec_node_remove(&loop->node); + + stitch_blocks(prev_block, block); +} + +/** + * Unroll a loop which does not contain any jumps. For example, if the input + * is: + * + * (loop (...) ...instrs...) + * + * And the iteration count is 3, the output will be: + * + * ...instrs... ...instrs... ...instrs... + */ +static void +simple_unroll(nir_function *fn, nir_loop *loop, nir_builder *b) +{ + nir_shader *ns = fn->shader; + nir_block *prev_block = + nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); + + struct exec_node *loop_node = exec_list_get_head(&loop->body); + nir_cf_node *lp_header_cf_node = exec_node_data(nir_cf_node, loop_node, + node); + + struct hash_table *remap_table = + _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + struct hash_table *phi_remap = + _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + /* Get the loop header this contains a bunch of phis and the loops + * conditional. + */ + assert(lp_header_cf_node->type == nir_cf_node_block); + nir_block *loop_header_blk = nir_cf_node_as_block(lp_header_cf_node); + + /* Remove any phi preds that are from the loop itself nir_opt_remove_phi() + * will clean this up further for us. + */ + nir_foreach_instr_safe(instr, loop_header_blk) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src_safe(src, phi) { + /* Is the pred from the block itself? */ + if (src->pred->index > phi->instr.block->index && + src->pred->cf_node.parent == &loop->cf_node) { + + /* Store the phi dest and src to be used for remaping */ + _mesa_hash_table_insert(phi_remap, &phi->dest.ssa, src->src.ssa); + + list_del(&src->src.use_link); + exec_node_remove(&src->node); + } + } + } + + /* Skip over if node (loop condition) and get the real loop body we need + * to do this now because we are about to start messing with the control + * flow. + */ + nir_cf_node *if_node = nir_cf_node_next(lp_header_cf_node); + nir_cf_node *cf_node = nir_cf_node_next(if_node); + assert(cf_node->type == nir_cf_node_block); + + /* Pluck out the loop header */ + nir_cf_list lp_header; + nir_cf_extract(&lp_header, nir_before_cf_node(lp_header_cf_node), + nir_after_cf_node(lp_header_cf_node)); + + loop_node = exec_list_get_head(&loop->body); + lp_header_cf_node = exec_node_data(nir_cf_node, loop_node, node); + + /* Extract phis from loop header and place before the loop */ + nir_cf_list lp_header_phis; + nir_cf_loop_list_extract(&lp_header_phis, lp_header_cf_node, + lp_header_cf_node); + b->cursor = nir_before_cf_node(&loop->cf_node); + nir_cf_reinsert(&lp_header_phis, b->cursor); + + /* Pluck out the loop body */ + nir_cf_list loop_body; + extract_loop_body(&loop_body, cf_node, ns); + + /* Initialise remap table with phi remap table */ + struct hash_entry *hte; + hash_table_foreach(phi_remap, hte) { + _mesa_hash_table_insert(remap_table, hte->key, hte->data); + } + + /* Clone loop header and append to the loop body */ + if (loop->info->trip_count > 0) { + nir_cf_list cloned_header; + exec_list_make_empty(&cloned_header.list); + cloned_header.impl = loop_body.impl; + + clone_list(ns, loop, &lp_header, &cloned_header, remap_table, phi_remap); + + /* Append loop body with header instructions */ + struct exec_node *lp_body_node = exec_list_get_tail(&loop_body.list); + nir_cf_node *end = exec_node_data(nir_cf_node, lp_body_node, node); + nir_block *lp_body_blk = nir_cf_node_as_block(end); + + struct exec_node *lp_header_node = + exec_list_get_head(&cloned_header.list); + nir_block *lp_header_blk = + nir_cf_node_as_block(exec_node_data(nir_cf_node, lp_header_node, + node)); + + foreach_list_typed(nir_instr, instr, node, &lp_header_blk->instr_list) { + instr->block = lp_body_blk; + } + + exec_list_append(&lp_body_blk->instr_list, &lp_header_blk->instr_list); + } + + /* Reinsert original header before the loop */ + nir_cf_reinsert(&lp_header, b->cursor); + + nir_cf_list *prev_lp = &loop_body; + + /* Create temp array to store the cloned loop body as we unroll */ + nir_cf_list unrolled_lp_body[2]; + exec_list_make_empty(&unrolled_lp_body[0].list); + exec_list_make_empty(&unrolled_lp_body[1].list); + unrolled_lp_body[0].impl = loop_body.impl; + unrolled_lp_body[1].impl = loop_body.impl; + + for (unsigned i = 1; i < loop->info->trip_count; i++) { + + /* Clone the loop body */ + if (exec_list_is_empty(&unrolled_lp_body[0].list)){ + clone_list(ns, loop, prev_lp, &unrolled_lp_body[0], remap_table, + phi_remap); + + /* Insert unrolled loop body before the loop. */ + b->cursor = nir_before_cf_node(&loop->cf_node); + nir_cf_reinsert(prev_lp, b->cursor); + + prev_lp = &unrolled_lp_body[0]; + } else { + clone_list(ns, loop, prev_lp, &unrolled_lp_body[1], remap_table, + phi_remap); + + /* Insert unrolled loop body before the loop. */ + b->cursor = nir_before_cf_node(&loop->cf_node); + nir_cf_reinsert(prev_lp, b->cursor); + + prev_lp = &unrolled_lp_body[1]; + } + } + + b->cursor = nir_before_cf_node(&loop->cf_node); + if (loop->info->trip_count != 0) { + nir_cf_reinsert(prev_lp, b->cursor); + } else { + /* Its possible to end up with a trip count of zero. For example: + * + * int 0; + * do res = res.yzwx; while (++i < 1); + * + * In this case we simple delete the loop and fix up the LCSSA phis. + */ + nir_cf_delete(prev_lp); + } + + /* The loop has been unrolled so remove it. */ + remove_unrolled_loop(&loop->cf_node, prev_block, remap_table, phi_remap, + fn->impl); + + _mesa_hash_table_destroy(remap_table, NULL); + _mesa_hash_table_destroy(phi_remap, NULL); +} + +static bool +process_loops(nir_cf_node *cf_node, nir_builder *b, bool *innermost_loop) +{ + bool progress = false; + nir_loop *loop; + + switch (cf_node->type) { + case nir_cf_node_block: + return progress; + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(cf_node); + foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list) + progress |= process_loops(nested_node, b, innermost_loop); + foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list) + progress |= process_loops(nested_node, b, innermost_loop); + return progress; + } + case nir_cf_node_loop: { + loop = nir_cf_node_as_loop(cf_node); + foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body) + progress |= process_loops(nested_node, b, innermost_loop); + break; + } + default: + unreachable("unknown cf node type"); + } + + if (*innermost_loop) { + nir_function *fn = nir_cf_node_get_function(&loop->cf_node)->function; + if (is_simple_for_loop(fn->shader, loop->info)) { + simple_unroll(fn, loop, b); + progress = true; + /* Don't unroll outer loops loops until after loop analysis have run + * again. + */ + *innermost_loop = false; + } else { + /* We couldn't unroll the innermost loop so don't try to unroll the + * outerloop. + */ + *innermost_loop = false; + } + } + + return progress; +} + +static bool +nir_opt_loop_unroll_impl(nir_function_impl *impl) +{ + bool progress = false; + nir_metadata_require(impl, nir_metadata_loop_analysis); + + nir_builder b; + nir_builder_init(&b, impl); + + foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { + bool innermost_loop = true; + progress |= process_loops(node, &b, &innermost_loop); + } + + return progress; +} + +bool +nir_opt_loop_unroll(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) { + progress |= nir_opt_loop_unroll_impl(function->impl); + } + } + return false; +} -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev