On 16/4/19 1:14 pm, Marek Olšák wrote:


On Mon, Apr 15, 2019, 10:41 PM Timothy Arceri <tarc...@itsqueeze.com <mailto:tarc...@itsqueeze.com>> wrote:



    On 16/4/19 7:03 am, Marek Olšák wrote:
     > ping
     >
     > On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák <mar...@gmail.com
    <mailto:mar...@gmail.com>
     > <mailto:mar...@gmail.com <mailto:mar...@gmail.com>>> wrote:
     >
     >     From: Marek Olšák <marek.ol...@amd.com
    <mailto:marek.ol...@amd.com> <mailto:marek.ol...@amd.com
    <mailto:marek.ol...@amd.com>>>
     >
     >     ---
     >       src/compiler/nir/nir.h                    |  8 +++++
     >       src/compiler/nir/nir_opt_intrinsics.c     | 40
    +++++++++++++++++++++--
     >       src/gallium/drivers/radeonsi/si_get.c     |  1 +
     >       src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
     >       4 files changed, 48 insertions(+), 2 deletions(-)
     >
     >     diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
     >     index 09950bf3398..d3874705235 100644
     >     --- a/src/compiler/nir/nir.h
     >     +++ b/src/compiler/nir/nir.h
     >     @@ -2283,20 +2283,28 @@ typedef struct
    nir_shader_compiler_options {
     >           *
     >           * This depends on some possibly hw implementation details,
     >     which may
     >           * not be true for all hw.  In particular that the FS is
    only
     >     executed
     >           * for covered samples or for helper invocations.  So,
    do not
     >     blindly
     >           * enable this option.
     >           *
     >           * Note: See also issue #22 in ARB_shader_image_load_store
     >           */
     >          bool lower_helper_invocation;
     >
     >     +   /**
     >     +    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
     >     +    *
     >     +    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
     >     +    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
     >     +    */
     >     +   bool optimize_sample_mask_in;
     >     +
     >          bool lower_cs_local_index_from_id;
     >          bool lower_cs_local_id_from_index;
     >
     >          bool lower_device_index_to_zero;
     >
     >          /* Set if nir_lower_wpos_ytransform() should also invert
     >     gl_PointCoord. */
     >          bool lower_wpos_pntc;
     >
     >          bool lower_hadd;
     >          bool lower_add_sat;
     >     diff --git a/src/compiler/nir/nir_opt_intrinsics.c
     >     b/src/compiler/nir/nir_opt_intrinsics.c
     >     index 7b054faa204..2f9e4e466c9 100644
     >     --- a/src/compiler/nir/nir_opt_intrinsics.c
     >     +++ b/src/compiler/nir/nir_opt_intrinsics.c
     >     @@ -22,21 +22,22 @@
     >        */
     >
     >       #include "nir.h"
     >       #include "nir_builder.h"
     >
     >       /**
     >        * \file nir_opt_intrinsics.c
     >        */
     >
     >       static bool
     >     -opt_intrinsics_impl(nir_function_impl *impl)
     >     +opt_intrinsics_impl(nir_function_impl *impl,
     >     +                    const struct nir_shader_compiler_options
    *options)
     >       {
     >          nir_builder b;
     >          nir_builder_init(&b, impl);
     >          bool progress = false;
     >
     >          nir_foreach_block(block, impl) {
     >             nir_foreach_instr_safe(instr, block) {
     >                if (instr->type != nir_instr_type_intrinsic)
     >                   continue;
     >
     >     @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
     >                case nir_intrinsic_vote_any:
     >                case nir_intrinsic_vote_all:
     >                   if (nir_src_is_const(intrin->src[0]))
     >                      replacement = nir_ssa_for_src(&b,
    intrin->src[0], 1);
     >                   break;
     >                case nir_intrinsic_vote_feq:
     >                case nir_intrinsic_vote_ieq:
     >                   if (nir_src_is_const(intrin->src[0]))
     >                      replacement = nir_imm_true(&b);
     >                   break;
     >     +         case nir_intrinsic_load_sample_mask_in:
     >     +            /* Transform:
     >     +             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
     >     +             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
     >     +             */
     >     +            if (!options->optimize_sample_mask_in)
     >     +               continue;
     >     +
     >     +            nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
     >     +               if (use_src->parent_instr->type ==
    nir_instr_type_alu) {
     >     +                  nir_alu_instr *alu =
     >     nir_instr_as_alu(use_src->parent_instr);
     >     +
     >     +                  if (alu->op == nir_op_ieq ||
     >     +                      alu->op == nir_op_ine) {
     >     +                     /* Check for 0 in either operand. */
     >     +                     nir_const_value *const_val =
>     +  nir_src_as_const_value(alu->src[0].src);
     >     +                     if (!const_val)
     >     +                        const_val =
     >     nir_src_as_const_value(alu->src[1].src);
     >     +                     if (!const_val || const_val->i32[0] != 0)
     >     +                        continue;
     >     +
     >     +                     nir_ssa_def *new_expr =
     >     nir_load_helper_invocation(&b, 1);
     >     +
     >     +                     if (alu->op == nir_op_ine32)

    How can this be nir_op_ine32 when the outer if condition is:

    alu->op == nir_op_ieq || alu->op == nir_op_ine

    ??


Yeah, that's a leftover from the previous version that used the 32 suffix everywhere and didn't work.

Ok. Well with that fixed the rest of the logic looks correct. I can't say I'm informed enough to know the details of how the opt works so:

Acked-by: Timothy Arceri <tarc...@itsqueeze.com>


Marek


     >     +                        new_expr = nir_inot(&b, new_expr);
     >     +
>     +  nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
     >     +
     >     nir_src_for_ssa(new_expr));
     >     +                     nir_instr_remove(&alu->instr);
     >     +                     continue;
     >     +                  }
     >     +               }
     >     +            }
     >     +            continue;
     >                default:
     >                   break;
     >                }
     >
     >                if (!replacement)
     >                   continue;
     >
     >                nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
>  nir_src_for_ssa(replacement));
     >                nir_instr_remove(instr);
     >     @@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl)
     >
     >       bool
     >       nir_opt_intrinsics(nir_shader *shader)
     >       {
     >          bool progress = false;
     >
     >          nir_foreach_function(function, shader) {
     >             if (!function->impl)
     >                continue;
     >
     >     -      if (opt_intrinsics_impl(function->impl)) {
     >     +      if (opt_intrinsics_impl(function->impl,
    shader->options)) {
     >                progress = true;
     >                nir_metadata_preserve(function->impl,
     >     nir_metadata_block_index |
     >
     >     nir_metadata_dominance);
     >             }
     >          }
     >
     >          return progress;
     >       }
     >     diff --git a/src/gallium/drivers/radeonsi/si_get.c
     >     b/src/gallium/drivers/radeonsi/si_get.c
     >     index 58b56b34d13..2142d5a33f2 100644
     >     --- a/src/gallium/drivers/radeonsi/si_get.c
     >     +++ b/src/gallium/drivers/radeonsi/si_get.c
     >     @@ -494,20 +494,21 @@ static const struct
     >     nir_shader_compiler_options nir_options = {
     >              .lower_pack_snorm_2x16 = true,
     >              .lower_pack_snorm_4x8 = true,
     >              .lower_pack_unorm_2x16 = true,
     >              .lower_pack_unorm_4x8 = true,
     >              .lower_unpack_snorm_2x16 = true,
     >              .lower_unpack_snorm_4x8 = true,
     >              .lower_unpack_unorm_2x16 = true,
     >              .lower_unpack_unorm_4x8 = true,
     >              .lower_extract_byte = true,
     >              .lower_extract_word = true,
     >     +       .optimize_sample_mask_in = true,
     >              .max_unroll_iterations = 32,
     >              .native_integers = true,
     >       };
     >
     >       static const void *
     >       si_get_compiler_options(struct pipe_screen *screen,
     >                              enum pipe_shader_ir ir,
     >                              enum pipe_shader_type shader)
     >       {
     >              assert(ir == PIPE_SHADER_IR_NIR);
     >     diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp
     >     b/src/mesa/state_tracker/st_glsl_to_nir.cpp
     >     index fb10869c9f9..8028ccf7110 100644
     >     --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
     >     +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
     >     @@ -497,20 +497,21 @@ st_glsl_to_nir_post_opts(struct st_context
     >     *st, struct gl_program *prog,
     >          /* This has to be done last.  Any operation the can cause
     >           * prog->ParameterValues to get reallocated (e.g., anything
     >     that adds a
     >           * program constant) has to happen before creating this
    linkage.
     >           */
     >          _mesa_associate_uniform_storage(st->ctx, shader_program,
    prog,
     >     true);
     >
     >          st_set_prog_affected_state_flags(prog);
     >
     >          NIR_PASS_V(nir, st_nir_lower_builtin);
     >          NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
     >     +   NIR_PASS_V(nir, nir_opt_intrinsics);
     >
     >          nir_variable_mode mask = nir_var_function_temp;
     >          nir_remove_dead_variables(nir, mask);
     >
     >          if (st->ctx->_Shader->Flags & GLSL_DUMP) {
     >             _mesa_log("\n");
     >             _mesa_log("NIR IR for linked %s program %d:\n",
     >                    _mesa_shader_stage_to_string(prog->info.stage),
     >                    shader_program->Name);
     >             nir_print_shader(nir, _mesa_get_log_file());
     >     --
     >     2.17.1
     >
     >
     > _______________________________________________
     > mesa-dev mailing list
     > mesa-dev@lists.freedesktop.org
    <mailto:mesa-dev@lists.freedesktop.org>
     > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
     >

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to