Re: [Mesa-dev] [PATCH 8/9 v2] nir: Add partial redundancy elimination for compares

andrey simiklit Mon, 03 Sep 2018 03:17:45 -0700

Hi,

I guess it is just a small mistake in description that:
'z' was before:
   z = y - x;
and became after:
   z = x - y;


I think you inadvertently misspelled in the description and you mean:

>
> This pass attempts to dectect code sequences like
>
>     if (x < y) {
>         z = y - x;
>         ...
>     }
>
> and replace them with sequences like
>
>     t = *y - x*;
>     if (t < 0) {
>         z = t;
>         ...
>     }
>

Because the subtraction is not commutative operation)
Please correct me if I am incorrect.

Regards,
Andrii.

On Thu, Aug 30, 2018 at 8:36 AM Ian Romanick <i...@freedesktop.org> wrote:

> From: Ian Romanick <ian.d.roman...@intel.com>
>
> This pass attempts to dectect code sequences like
>
>     if (x < y) {
>         z = y - x;
>         ...
>     }
>
> and replace them with sequences like
>
>     t = x - y;
>     if (t < 0) {
>         z = t;
>         ...
>     }
>
> On architectures where the subtract can generate the flags used by the
> if-statement, this saves an instruction.  It's also possible that moving
> an instruction out of the if-statement will allow
> nir_opt_peephole_select to convert the whole thing to a bcsel.
>
> Currently only floating point compares and adds are supported.  Adding
> support for integer will be a challenge due to integer overflow.  There
> are a couple possible solutions, but they may not apply to all
> architectures.
>
> v2: Fix a typo in the commit message and a couple typos in comments.
> Fix possible NULL pointer deref from result of push_block().  Add
> missing (-A + B) case.  Suggested by Caio.
>
> Signed-off-by: Ian Romanick <ian.d.roman...@intel.com>
> ---
>  src/compiler/Makefile.sources             |   1 +
>  src/compiler/nir/meson.build              |   1 +
>  src/compiler/nir/nir.h                    |   2 +
>  src/compiler/nir/nir_opt_comparison_pre.c | 360
> ++++++++++++++++++++++++++++++
>  src/compiler/nir/nir_search_helpers.h     |  29 +++
>  5 files changed, 393 insertions(+)
>  create mode 100644 src/compiler/nir/nir_opt_comparison_pre.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index d3b06564832..9fe8d5b8904 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -267,6 +267,7 @@ NIR_FILES = \
>         nir/nir_move_load_const.c \
>         nir/nir_move_vec_src_uses_to_dest.c \
>         nir/nir_normalize_cubemap_coords.c \
> +       nir/nir_opt_comparison_pre.c \
>         nir/nir_opt_conditional_discard.c \
>         nir/nir_opt_constant_folding.c \
>         nir/nir_opt_copy_prop_vars.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index 5438c17a8f8..2bcc854829e 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -151,6 +151,7 @@ files_libnir = files(
>    'nir_move_load_const.c',
>    'nir_move_vec_src_uses_to_dest.c',
>    'nir_normalize_cubemap_coords.c',
> +  'nir_opt_comparison_pre.c',
>    'nir_opt_conditional_discard.c',
>    'nir_opt_constant_folding.c',
>    'nir_opt_copy_prop_vars.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index ddbcb3c647e..c78387d0acf 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3000,6 +3000,8 @@ bool nir_convert_from_ssa(nir_shader *shader, bool
> phi_webs_only);
>  bool nir_lower_phis_to_regs_block(nir_block *block);
>  bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
>
> +bool nir_opt_comparison_pre(nir_shader *shader);
> +
>  bool nir_opt_algebraic(nir_shader *shader);
>  bool nir_opt_algebraic_before_ffma(nir_shader *shader);
>  bool nir_opt_algebraic_late(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_comparison_pre.c
> b/src/compiler/nir/nir_opt_comparison_pre.c
> new file mode 100644
> index 00000000000..b2827c21816
> --- /dev/null
> +++ b/src/compiler/nir/nir_opt_comparison_pre.c
> @@ -0,0 +1,360 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions of
> the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir_instr_set.h"
> +#include "nir_search_helpers.h"
> +#include "nir_builder.h"
> +#include "util/u_vector.h"
> +
> +/* Partial redundancy elimination of compares
> + *
> + * Seaches for comparisons of the form 'a cmp b' that dominate arithmetic
> + * instructions like 'b - a'.  The comparison is replaced by the
> arithmetic
> + * instruction, and the result is compared with zero.  For example,
> + *
> + *       vec1 32 ssa_111 = flt 0.37, ssa_110.w
> + *       if ssa_111 {
> + *               block block_1:
> + *              vec1 32 ssa_112 = fadd ssa_110.w, -0.37
> + *              ...
> + *
> + * becomes
> + *
> + *       vec1 32 ssa_111 = fadd ssa_110.w, -0.37
> + *       vec1 32 ssa_112 = flt 0.0, ssa_111
> + *       if ssa_112 {
> + *               block block_1:
> + *              ...
> + */
> +
> +struct block_queue {
> +   struct exec_list blocks;
> +   struct exec_list reusable_blocks;
> +};
> +
> +struct block_instructions {
> +   struct exec_node node;
> +   struct u_vector instructions;
> +};
> +
> +static void
> +block_queue_init(struct block_queue *bq)
> +{
> +   exec_list_make_empty(&bq->blocks);
> +   exec_list_make_empty(&bq->reusable_blocks);
> +}
> +
> +static void
> +block_queue_finish(struct block_queue *bq)
> +{
> +   struct block_instructions *n;
> +
> +   while ((n = (struct block_instructions *)
> exec_list_pop_head(&bq->blocks)) != NULL) {
> +      u_vector_finish(&n->instructions);
> +      free(n);
> +   }
> +
> +   while ((n = (struct block_instructions *)
> exec_list_pop_head(&bq->reusable_blocks)) != NULL) {
> +      free(n);
> +   }
> +}
> +
> +static struct block_instructions *
> +push_block(struct block_queue *bq)
> +{
> +   struct block_instructions *bi =
> +      (struct block_instructions *)
> exec_list_pop_head(&bq->reusable_blocks);
> +
> +   if (bi == NULL) {
> +      bi = calloc(1, sizeof(struct block_instructions));
> +
> +      if (bi == NULL)
> +         return NULL;
> +   }
> +
> +   if (!u_vector_init(&bi->instructions,
> +                      sizeof(struct nir_alu_instr *),
> +                      8 * sizeof(struct nir_alu_instr *)))
> +      return NULL;
> +
> +   exec_list_push_tail(&bq->blocks, &bi->node);
> +
> +   return bi;
> +}
> +
> +static void
> +pop_block(struct block_queue *bq, struct block_instructions *bi)
> +{
> +   u_vector_finish(&bi->instructions);
> +   exec_node_remove(&bi->node);
> +   exec_list_push_head(&bq->reusable_blocks, &bi->node);
> +}
> +
> +static void
> +add_instruction_for_block(struct block_instructions *bi,
> +                          struct nir_alu_instr *alu)
> +{
> +   struct nir_alu_instr **data =
> +      u_vector_add(&bi->instructions);
> +
> +   *data = alu;
> +}
> +
> +static void
> +rewrite_compare_instruction(nir_builder *bld, nir_alu_instr *orig_cmp,
> +                            nir_alu_instr *orig_add, bool zero_on_left)
> +{
> +   void *const mem_ctx = ralloc_parent(orig_cmp);
> +
> +   bld->cursor = nir_before_instr(&orig_cmp->instr);
> +
> +   /* This is somewhat tricky.  The compare instruction may be something
> like
> +    * (fcmp, a, b) while the add instruction is something like (fadd,
> fneg(a),
> +    * b).  This is problematic because the SSA value for the fneg(a) may
> not
> +    * exist yet at the compare instruction.
> +    *
> +    * We fabricate the operands of the new add.  This is done using
> +    * information provided by zero_on_left.  If zero_on_left is true, we
> know
> +    * the resulting compare instruction is (fcmp, 0.0, (fadd, x, y)).  If
> the
> +    * original compare instruction was (fcmp, a, b), x = b and y = -a.  If
> +    * zero_on_left is false, the resulting compare instruction is (fcmp,
> +    * (fadd, x, y), 0.0) and x = a and y = -b.
> +    */
> +   nir_ssa_def *const a = nir_ssa_for_alu_src(bld, orig_cmp, 0);
> +   nir_ssa_def *const b = nir_ssa_for_alu_src(bld, orig_cmp, 1);
> +
> +   nir_ssa_def *const fadd = zero_on_left
> +      ? nir_fadd(bld, b, nir_fneg(bld, a))
> +      : nir_fadd(bld, a, nir_fneg(bld, b));
> +
> +   nir_ssa_def *const zero =
> +      nir_imm_floatN_t(bld, 0.0, orig_add->dest.dest.ssa.bit_size);
> +
> +   nir_ssa_def *const cmp = zero_on_left
> +      ? nir_build_alu(bld, orig_cmp->op, zero, fadd, NULL, NULL)
> +      : nir_build_alu(bld, orig_cmp->op, fadd, zero, NULL, NULL);
> +
> +   /* Generating extra moves of the results is the easy way to make sure
> the
> +    * writemasks match the original instructions.  Later optimization
> passes
> +    * will clean these up.  This is similar to nir_replace_instr (in
> +    * nir_search.c).
> +    */
> +   nir_alu_instr *mov_add = nir_alu_instr_create(mem_ctx, nir_op_imov);
> +   mov_add->dest.write_mask = orig_add->dest.write_mask;
> +   nir_ssa_dest_init(&mov_add->instr, &mov_add->dest.dest,
> +                     orig_add->dest.dest.ssa.num_components,
> +                     orig_add->dest.dest.ssa.bit_size, NULL);
> +   mov_add->src[0].src = nir_src_for_ssa(fadd);
> +
> +   nir_builder_instr_insert(bld, &mov_add->instr);
> +
> +   nir_alu_instr *mov_cmp = nir_alu_instr_create(mem_ctx, nir_op_imov);
> +   mov_cmp->dest.write_mask = orig_cmp->dest.write_mask;
> +   nir_ssa_dest_init(&mov_cmp->instr, &mov_cmp->dest.dest,
> +                     orig_cmp->dest.dest.ssa.num_components,
> +                     orig_cmp->dest.dest.ssa.bit_size, NULL);
> +   mov_cmp->src[0].src = nir_src_for_ssa(cmp);
> +
> +   nir_builder_instr_insert(bld, &mov_cmp->instr);
> +
> +   nir_ssa_def_rewrite_uses(&orig_cmp->dest.dest.ssa,
> +                            nir_src_for_ssa(&mov_cmp->dest.dest.ssa));
> +   nir_ssa_def_rewrite_uses(&orig_add->dest.dest.ssa,
> +                            nir_src_for_ssa(&mov_add->dest.dest.ssa));
> +
> +   /* We know these have no more uses because we just rewrote them all,
> so we
> +    * can remove them.
> +    */
> +   nir_instr_remove(&orig_cmp->instr);
> +   nir_instr_remove(&orig_add->instr);
> +}
> +
> +static bool
> +comparison_pre_block(nir_block *block, struct block_queue *bq,
> nir_builder *bld)
> +{
> +   bool progress = false;
> +
> +   struct block_instructions *bi = push_block(bq);
> +   if (bi == NULL)
> +      return false;
> +
> +   nir_foreach_instr_safe(instr, block) {
> +      if (instr->type != nir_instr_type_alu)
> +         continue;
> +
> +      struct nir_alu_instr *const alu = nir_instr_as_alu(instr);
> +
> +      if (alu->dest.dest.ssa.num_components != 1)
> +         continue;
> +
> +      if (alu->dest.saturate)
> +         continue;
> +
> +      static const uint8_t swizzle[4] = { 0, 0, 0, 0 };
> +
> +      switch (alu->op) {
> +      case nir_op_fadd: {
> +         /* If the instruction is fadd, check it against comparison
> +          * instructions that dominate it.
> +          */
> +         struct block_instructions *b =
> +            (struct block_instructions *)
> exec_list_get_head_raw(&bq->blocks);
> +
> +         while (b->node.next != NULL) {
> +            nir_alu_instr **a;
> +            bool rewrote_compare = false;
> +
> +            u_vector_foreach(a, &b->instructions) {
> +               nir_alu_instr *const cmp = *a;
> +
> +               if (cmp == NULL)
> +                  continue;
> +
> +               /* The operands of both instructions are, with some
> liberty,
> +                * commutative.  Check all three permutations.  The third
> +                * permutation is a negation of the original operation, so
> it
> +                * is handled separately.
> +                */
> +               if ((nir_alu_srcs_equal(cmp, alu, 0, 0) &&
> +                    nir_alu_srcs_negative_equal(cmp, alu, 1, 1)) ||
> +                   (nir_alu_srcs_equal(cmp, alu, 0, 1) &&
> +                    nir_alu_srcs_negative_equal(cmp, alu, 1, 0))) {
> +                  /* These are the cases where (A cmp B) matches either
> (A +
> +                   * -B) or (-B + A)
> +                   *
> +                   *    A cmp B <=> A + -B cmp 0
> +                   */
> +                  rewrite_compare_instruction(bld, cmp, alu, false);
> +
> +                  *a = NULL;
> +                  rewrote_compare = true;
> +                  break;
> +               } else if ((nir_alu_srcs_equal(cmp, alu, 1, 0) &&
> +                           nir_alu_srcs_negative_equal(cmp, alu, 0, 1)) ||
> +                          (nir_alu_srcs_equal(cmp, alu, 1, 1) &&
> +                           nir_alu_srcs_negative_equal(cmp, alu, 0, 0))) {
> +                  /* This is the case where (A cmp B) matches (B + -A) or
> (-A
> +                   * + B).
> +                   *
> +                   *    A cmp B <=> 0 cmp B + -A
> +                   */
> +                  rewrite_compare_instruction(bld, cmp, alu, true);
> +
> +                  *a = NULL;
> +                  rewrote_compare = true;
> +                  break;
> +               }
> +            }
> +
> +            /* Bail after a compare in the most dominating block is found.
> +             * This is necessary because 'alu' has been removed from the
> +             * instruction stream.  Should there be a matching compare in
> +             * another block, calling rewrite_compare_instruction again
> will
> +             * try to operate on a node that is not in the list as if it
> were
> +             * in the list.
> +             *
> +             * FINISHME: There may be opportunity for additional
> optimization
> +             * here.  I discovered this problem due to a shader in
> Guacamelee.
> +             * It may be possible to rewrite the matching compares that
> are
> +             * encountered later to reuse the result from the compare
> that was
> +             * first rewritten.  It's also possible that this is just
> taken
> +             * care of by calling the optimization pass repeatedly.
> +             */
> +            if (rewrote_compare) {
> +               progress = true;
> +               break;
> +            }
> +
> +            b = (struct block_instructions *) b->node.next;
> +         }
> +
> +         break;
> +      }
> +
> +      case nir_op_flt:
> +      case nir_op_fge:
> +      case nir_op_fne:
> +      case nir_op_feq:
> +         /* If the instruction is a comparison that is used by an
> if-statement
> +          * or a bcsel and neither operand is immediate value 0, add it
> to the
> +          * set.
> +          */
> +         if (is_used_by_if(alu) &&
> +             is_not_const_zero(alu, 0, 1, swizzle) &&
> +             is_not_const_zero(alu, 1, 1, swizzle))
> +            add_instruction_for_block(bi, alu);
> +
> +         break;
> +
> +      default:
> +         break;
> +      }
> +   }
> +
> +   for (unsigned i = 0; i < block->num_dom_children; i++) {
> +      nir_block *child = block->dom_children[i];
> +
> +      if (comparison_pre_block(child, bq, bld))
> +         progress = true;
> +   }
> +
> +   pop_block(bq, bi);
> +
> +   return progress;
> +}
> +
> +static bool
> +nir_opt_comparison_pre_impl(nir_function_impl *impl)
> +{
> +   struct block_queue bq;
> +   nir_builder bld;
> +
> +   block_queue_init(&bq);
> +   nir_builder_init(&bld, impl);
> +
> +   nir_metadata_require(impl, nir_metadata_dominance);
> +
> +   const bool progress =
> +      comparison_pre_block(nir_start_block(impl), &bq, &bld);
> +
> +   block_queue_finish(&bq);
> +
> +   if (progress)
> +      nir_metadata_preserve(impl, nir_metadata_block_index |
> +                                  nir_metadata_dominance);
> +
> +   return progress;
> +}
> +
> +bool
> +nir_opt_comparison_pre(nir_shader *shader)
> +{
> +   bool progress = false;
> +
> +   nir_foreach_function(function, shader) {
> +      if (function->impl)
> +         progress |= nir_opt_comparison_pre_impl(function->impl);
> +   }
> +
> +   return progress;
> +}
> diff --git a/src/compiler/nir/nir_search_helpers.h
> b/src/compiler/nir/nir_search_helpers.h
> index 8bc6d723c34..85af2f12f9e 100644
> --- a/src/compiler/nir/nir_search_helpers.h
> +++ b/src/compiler/nir/nir_search_helpers.h
> @@ -109,6 +109,29 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src,
> unsigned num_components,
>     return true;
>  }
>
> +static inline bool
> +is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned
> num_components,
> +                  const uint8_t *swizzle)
> +{
> +   nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
> +
> +   if (!val)
> +      return true;
> +
> +   for (unsigned i = 0; i < num_components; i++) {
> +      switch (nir_op_infos[instr->op].input_types[src]) {
> +      case nir_type_float:
> +         if (val->f32[swizzle[i]] == 0.0f)
> +            return false;
> +         break;
> +      default:
> +         return false;
> +      }
> +   }
> +
> +   return true;
> +}
> +
>  static inline bool
>  is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned
> num_components,
>               UNUSED const uint8_t *swizzle)
> @@ -159,6 +182,12 @@ is_used_once(nir_alu_instr *instr)
>     return true;
>  }
>
> +static inline bool
> +is_used_by_if(nir_alu_instr *instr)
> +{
> +   return !list_empty(&instr->dest.dest.ssa.if_uses);
> +}
> +
>  static inline bool
>  is_not_used_by_if(nir_alu_instr *instr)
>  {
> --
> 2.14.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/9 v2] nir: Add partial redundancy elimination for compares

Reply via email to