On Wed, Jul 4, 2018 at 9:13 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > Many fragment shaders do a discard using relatively little information > but still put the discard fairly far down in the shader for no good > reason. If the discard is moved higher up, we can possibly avoid doing > some or almost all of the work in the shader. When this lets us skip > texturing operations, it's an especially high win. > > One of the biggest offenders here is DXVK. The D3D APIs have different > rules for discards than OpenGL and Vulkan. One effective way (which is > what DXVK uses) to implement DX behavior on top of GL or Vulkan is to > wait until the very end of the shader to discard. This ends up in the > pessimal case where we always do all of the work before discarding. > This pass helps some DXVK shaders significantly. > --- > src/compiler/Makefile.sources | 1 + > src/compiler/nir/meson.build | 1 + > src/compiler/nir/nir.h | 10 + > src/compiler/nir/nir_opt_discard.c | 404 +++++++++++++++++++++++++++++ > 4 files changed, 416 insertions(+) > create mode 100644 src/compiler/nir/nir_opt_discard.c > > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources > index 9e3fbdc2612..8600ce81281 100644 > --- a/src/compiler/Makefile.sources > +++ b/src/compiler/Makefile.sources > @@ -271,6 +271,7 @@ NIR_FILES = \ > nir/nir_opt_cse.c \ > nir/nir_opt_dce.c \ > nir/nir_opt_dead_cf.c \ > + nir/nir_opt_discard.c \ > nir/nir_opt_gcm.c \ > nir/nir_opt_global_to_local.c \ > nir/nir_opt_if.c \ > diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build > index 28aa8de7014..e339258bb94 100644 > --- a/src/compiler/nir/meson.build > +++ b/src/compiler/nir/meson.build > @@ -156,6 +156,7 @@ files_libnir = files( > 'nir_opt_cse.c', > 'nir_opt_dce.c', > 'nir_opt_dead_cf.c', > + 'nir_opt_discard.c', > 'nir_opt_gcm.c', > 'nir_opt_global_to_local.c', > 'nir_opt_if.c', > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index 92ab3a699cc..44006dac160 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options { > */ > bool vs_inputs_dual_locations; > > + /** > + * Whether or not derivatives are still a safe operation after a discard > + * has occurred. Optimization passes may be able to be a bit more > + * agressive if this is true. > + */ > + bool derivatives_safe_after_discard; > + > unsigned max_unroll_iterations; > } nir_shader_compiler_options; > > @@ -2899,6 +2906,9 @@ bool nir_opt_dce(nir_shader *shader); > > bool nir_opt_dead_cf(nir_shader *shader); > > +bool nir_opt_discard_if(nir_shader *shader); > +bool nir_opt_move_discards_to_top(nir_shader *shader); > + > bool nir_opt_gcm(nir_shader *shader, bool value_number); > > bool nir_opt_if(nir_shader *shader); > diff --git a/src/compiler/nir/nir_opt_discard.c > b/src/compiler/nir/nir_opt_discard.c > new file mode 100644 > index 00000000000..eb1a8296282 > --- /dev/null > +++ b/src/compiler/nir/nir_opt_discard.c > @@ -0,0 +1,404 @@ > +/* > + * Copyright © 2014 Intel Corporation
2018? > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + * Jason Ekstrand (ja...@jlekstrand.net) > + * > + */ > + > +#include "nir.h" > +#include "nir_builder.h" > +#include "nir_control_flow.h" > +#include "nir_worklist.h" > + > +static bool > +block_has_only_discard(nir_block *block) > +{ > + nir_instr *instr = nir_block_first_instr(block); > + if (instr == NULL || instr != nir_block_last_instr(block)) > + return false; > + > + if (instr->type != nir_instr_type_intrinsic) > + return false; > + > + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > + return intrin->intrinsic == nir_intrinsic_discard; > +} > + > +static bool > +opt_discard_if_impl(nir_function_impl *impl) > +{ > + bool progress = false; > + > + nir_builder b; > + nir_builder_init(&b, impl); > + > + nir_foreach_block(block, impl) { > + nir_if *nif = nir_block_get_following_if(block); > + if (!nif) > + continue; > + > + bool discard_in_then; > + if (block_has_only_discard(nir_if_first_then_block(nif))) > + discard_in_then = true; > + else if (block_has_only_discard(nir_if_first_else_block(nif))) > + discard_in_then = false; > + else > + continue; > + > + b.cursor = nir_after_block(block); > + nir_ssa_def *cond = nir_ssa_for_src(&b, nif->condition, 1); > + if (!discard_in_then) > + cond = nir_inot(&b, cond); > + > + nir_intrinsic_instr *discard_if = > + nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if); > + discard_if->src[0] = nir_src_for_ssa(cond); > + nir_builder_instr_insert(&b, &discard_if->instr); > + > + nir_lower_phis_to_regs_block(nir_cf_node_as_block( > + nir_cf_node_next(&nif->cf_node))); > + > + nir_cf_list list; > + if (discard_in_then) > + nir_cf_list_extract(&list, &nif->else_list); > + else > + nir_cf_list_extract(&list, &nif->then_list); > + nir_cf_reinsert(&list, nir_after_instr(&discard_if->instr)); > + > + nir_cf_node_remove(&nif->cf_node); > + > + progress = true; > + } > + > + /* If we modified control-flow, metadata is toast. Also, we may have > + * lowered some phis to registers so we need to back into SSA. > + */ > + if (progress) { > + nir_metadata_preserve(impl, 0); > + nir_lower_regs_to_ssa_impl(impl); > + } > + > + return progress; > +} > + > +bool > +nir_opt_discard_if(nir_shader *shader) > +{ > + assert(shader->info.stage == MESA_SHADER_FRAGMENT); > + > + bool progress = false; > + > + nir_foreach_function(function, shader) { > + if (function->impl && > + opt_discard_if_impl(function->impl)) > + progress = true; > + } > + > + return progress; > +} > + > +static bool > +nir_variable_mode_is_read_only(nir_variable_mode mode) > +{ > + return mode == nir_var_shader_in || > + mode == nir_var_uniform || > + mode == nir_var_system_value; > +} > + > +static bool > +nir_op_is_derivative(nir_op op) > +{ > + return op == nir_op_fddx || > + op == nir_op_fddy || > + op == nir_op_fddx_fine || > + op == nir_op_fddy_fine || > + op == nir_op_fddx_coarse || > + op == nir_op_fddy_coarse; > +} > + > +static bool > +nir_texop_implies_derivative(nir_texop op) > +{ > + return op == nir_texop_tex || > + op == nir_texop_txb || > + op == nir_texop_lod; > +} > + > +static bool > +nir_intrinsic_writes_external_memory(nir_intrinsic_op intrin) > +{ > + switch (intrin) { > + case nir_intrinsic_store_deref: > + case nir_intrinsic_copy_deref: > + case nir_intrinsic_deref_atomic_add: > + case nir_intrinsic_deref_atomic_imin: > + case nir_intrinsic_deref_atomic_umin: > + case nir_intrinsic_deref_atomic_imax: > + case nir_intrinsic_deref_atomic_umax: > + case nir_intrinsic_deref_atomic_and: > + case nir_intrinsic_deref_atomic_or: > + case nir_intrinsic_deref_atomic_xor: > + case nir_intrinsic_deref_atomic_exchange: > + case nir_intrinsic_deref_atomic_comp_swap: > + /* If we ever start using variables for SSBO ops, we'll need to do > + * something here. For now, they're safe. > + */ > + return false; > + > + case nir_intrinsic_store_ssbo: > + case nir_intrinsic_ssbo_atomic_add: > + case nir_intrinsic_ssbo_atomic_imin: > + case nir_intrinsic_ssbo_atomic_umin: > + case nir_intrinsic_ssbo_atomic_imax: > + case nir_intrinsic_ssbo_atomic_umax: > + case nir_intrinsic_ssbo_atomic_and: > + case nir_intrinsic_ssbo_atomic_or: > + case nir_intrinsic_ssbo_atomic_xor: > + case nir_intrinsic_ssbo_atomic_exchange: > + case nir_intrinsic_ssbo_atomic_comp_swap: > + return true; > + > + case nir_intrinsic_image_deref_store: > + case nir_intrinsic_image_deref_atomic_add: > + case nir_intrinsic_image_deref_atomic_min: > + case nir_intrinsic_image_deref_atomic_max: > + case nir_intrinsic_image_deref_atomic_and: > + case nir_intrinsic_image_deref_atomic_or: > + case nir_intrinsic_image_deref_atomic_xor: > + case nir_intrinsic_image_deref_atomic_exchange: > + case nir_intrinsic_image_deref_atomic_comp_swap: > + return true; > + > + default: > + return false; > + } > +} > + > +static bool > +add_src_instr_to_worklist(nir_src *src, void *wl) > +{ > + if (!src->is_ssa) > + return false; > + > + nir_instr_worklist_push_tail(wl, src->ssa->parent_instr); > + return true; > +} > + > +static bool > +try_move_discard(nir_builder *b, nir_intrinsic_instr *discard) > +{ > + /* We require the discard to be in the top level of control flow. We > + * could, in theory, move discards that are inside ifs or loops but that > + * would be a lot more work. > + */ > + if (discard->instr.block->cf_node.parent->type != nir_cf_node_function) > + return false; > + > + /* Build the set of all instructions discard depends on. While we're at > + * it, we watch out for any cases that would prevent us from moving the > + * instruction. > + */ > + struct set *dep_instrs = _mesa_set_create(NULL, _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + > + bool can_move_discard = true; > + nir_instr_worklist *work = nir_instr_worklist_create(); > + add_src_instr_to_worklist(&discard->src[0], work); > + nir_foreach_instr_in_worklist(instr, work) { > + /* Don't process an instruction twice */ > + if (_mesa_set_search(dep_instrs, instr)) > + continue; > + > + /* Phi instructions can't be moved at all. Also, if we're dependent on > + * a phi then we are dependent on some other bit of control flow and > + * it's hard to figure out the proper condition. > + */ > + if (instr->type == nir_instr_type_phi) { > + can_move_discard = false; > + break; > + } > + > + if (instr->type == nir_instr_type_intrinsic) { > + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > + if (intrin->intrinsic == nir_intrinsic_load_deref) { > + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); > + if (!nir_variable_mode_is_read_only(deref->mode)) { > + can_move_discard = false; > + break; > + } > + } else if (!(nir_intrinsic_infos[intrin->intrinsic].flags & > + NIR_INTRINSIC_CAN_REORDER)) { > + can_move_discard = false; > + break; > + } > + } > + > + if (!nir_foreach_src(instr, add_src_instr_to_worklist, work)) { > + can_move_discard = false; > + break; > + } > + > + _mesa_set_add(dep_instrs, instr); > + } > + > + if (!can_move_discard) { > + nir_instr_worklist_destroy(work); > + _mesa_set_destroy(dep_instrs, NULL); > + return false; > + } > + > + nir_function_impl *impl = > + nir_cf_node_as_function(discard->instr.block->cf_node.parent); > + > + /* Walk the list of instructions and move the discard and everything it > + * depends on to the top. We walk the instruction list here because it > + * ensures that everything stays in its original order. This provides > + * stability for the algorithm and ensures that we don't accidentalyly accidentally Gražvydas _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev