Re: [Mesa-dev] [PATCH] nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible

2019-04-15 Thread Timothy Arceri



On 16/4/19 1:14 pm, Marek Olšák wrote:



On Mon, Apr 15, 2019, 10:41 PM Timothy Arceri > wrote:




On 16/4/19 7:03 am, Marek Olšák wrote:
 > ping
 >
 > On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák mailto:mar...@gmail.com>
 > >> wrote:
 >
 >     From: Marek Olšák mailto:marek.ol...@amd.com> >>
 >
 >     ---
 >       src/compiler/nir/nir.h                    |  8 +
 >       src/compiler/nir/nir_opt_intrinsics.c     | 40
+--
 >       src/gallium/drivers/radeonsi/si_get.c     |  1 +
 >       src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
 >       4 files changed, 48 insertions(+), 2 deletions(-)
 >
 >     diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
 >     index 09950bf3398..d3874705235 100644
 >     --- a/src/compiler/nir/nir.h
 >     +++ b/src/compiler/nir/nir.h
 >     @@ -2283,20 +2283,28 @@ typedef struct
nir_shader_compiler_options {
 >           *
 >           * This depends on some possibly hw implementation details,
 >     which may
 >           * not be true for all hw.  In particular that the FS is
only
 >     executed
 >           * for covered samples or for helper invocations.  So,
do not
 >     blindly
 >           * enable this option.
 >           *
 >           * Note: See also issue #22 in ARB_shader_image_load_store
 >           */
 >          bool lower_helper_invocation;
 >
 >     +   /**
 >     +    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
 >     +    *
 >     +    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
 >     +    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
 >     +    */
 >     +   bool optimize_sample_mask_in;
 >     +
 >          bool lower_cs_local_index_from_id;
 >          bool lower_cs_local_id_from_index;
 >
 >          bool lower_device_index_to_zero;
 >
 >          /* Set if nir_lower_wpos_ytransform() should also invert
 >     gl_PointCoord. */
 >          bool lower_wpos_pntc;
 >
 >          bool lower_hadd;
 >          bool lower_add_sat;
 >     diff --git a/src/compiler/nir/nir_opt_intrinsics.c
 >     b/src/compiler/nir/nir_opt_intrinsics.c
 >     index 7b054faa204..2f9e4e466c9 100644
 >     --- a/src/compiler/nir/nir_opt_intrinsics.c
 >     +++ b/src/compiler/nir/nir_opt_intrinsics.c
 >     @@ -22,21 +22,22 @@
 >        */
 >
 >       #include "nir.h"
 >       #include "nir_builder.h"
 >
 >       /**
 >        * \file nir_opt_intrinsics.c
 >        */
 >
 >       static bool
 >     -opt_intrinsics_impl(nir_function_impl *impl)
 >     +opt_intrinsics_impl(nir_function_impl *impl,
 >     +                    const struct nir_shader_compiler_options
*options)
 >       {
 >          nir_builder b;
 >          nir_builder_init(, impl);
 >          bool progress = false;
 >
 >          nir_foreach_block(block, impl) {
 >             nir_foreach_instr_safe(instr, block) {
 >                if (instr->type != nir_instr_type_intrinsic)
 >                   continue;
 >
 >     @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
 >                case nir_intrinsic_vote_any:
 >                case nir_intrinsic_vote_all:
 >                   if (nir_src_is_const(intrin->src[0]))
 >                      replacement = nir_ssa_for_src(,
intrin->src[0], 1);
 >                   break;
 >                case nir_intrinsic_vote_feq:
 >                case nir_intrinsic_vote_ieq:
 >                   if (nir_src_is_const(intrin->src[0]))
 >                      replacement = nir_imm_true();
 >                   break;
 >     +         case nir_intrinsic_load_sample_mask_in:
 >     +            /* Transform:
 >     +             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
 >     +             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
 >     +             */
 >     +            if (!options->optimize_sample_mask_in)
 >     +               continue;
 >     +
 >     +            nir_foreach_use_safe(use_src, >dest.ssa) {
 >     +               if (use_src->parent_instr->type ==
nir_instr_type_alu) {
 >     +                  nir_alu_instr *alu =
 >     nir_instr_as_alu(use_src->parent_instr);
 >     +
 >     +                  if (alu->op == nir_op_ieq ||
 >     +                      alu->op == nir_op_ine) {
 >     +                     /* Check for 0 in either operand. */
 >     +                     nir_const_value *const_val =
 >     +   
  

Re: [Mesa-dev] [PATCH] nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible

2019-04-15 Thread Marek Olšák
On Mon, Apr 15, 2019, 10:41 PM Timothy Arceri  wrote:

>
>
> On 16/4/19 7:03 am, Marek Olšák wrote:
> > ping
> >
> > On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák  > > wrote:
> >
> > From: Marek Olšák mailto:marek.ol...@amd.com>>
> >
> > ---
> >   src/compiler/nir/nir.h|  8 +
> >   src/compiler/nir/nir_opt_intrinsics.c | 40
> +--
> >   src/gallium/drivers/radeonsi/si_get.c |  1 +
> >   src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
> >   4 files changed, 48 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 09950bf3398..d3874705235 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options {
> >   *
> >   * This depends on some possibly hw implementation details,
> > which may
> >   * not be true for all hw.  In particular that the FS is only
> > executed
> >   * for covered samples or for helper invocations.  So, do not
> > blindly
> >   * enable this option.
> >   *
> >   * Note: See also issue #22 in ARB_shader_image_load_store
> >   */
> >  bool lower_helper_invocation;
> >
> > +   /**
> > +* Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
> > +*
> > +*   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> > +*   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> > +*/
> > +   bool optimize_sample_mask_in;
> > +
> >  bool lower_cs_local_index_from_id;
> >  bool lower_cs_local_id_from_index;
> >
> >  bool lower_device_index_to_zero;
> >
> >  /* Set if nir_lower_wpos_ytransform() should also invert
> > gl_PointCoord. */
> >  bool lower_wpos_pntc;
> >
> >  bool lower_hadd;
> >  bool lower_add_sat;
> > diff --git a/src/compiler/nir/nir_opt_intrinsics.c
> > b/src/compiler/nir/nir_opt_intrinsics.c
> > index 7b054faa204..2f9e4e466c9 100644
> > --- a/src/compiler/nir/nir_opt_intrinsics.c
> > +++ b/src/compiler/nir/nir_opt_intrinsics.c
> > @@ -22,21 +22,22 @@
> >*/
> >
> >   #include "nir.h"
> >   #include "nir_builder.h"
> >
> >   /**
> >* \file nir_opt_intrinsics.c
> >*/
> >
> >   static bool
> > -opt_intrinsics_impl(nir_function_impl *impl)
> > +opt_intrinsics_impl(nir_function_impl *impl,
> > +const struct nir_shader_compiler_options
> *options)
> >   {
> >  nir_builder b;
> >  nir_builder_init(, impl);
> >  bool progress = false;
> >
> >  nir_foreach_block(block, impl) {
> > nir_foreach_instr_safe(instr, block) {
> >if (instr->type != nir_instr_type_intrinsic)
> >   continue;
> >
> > @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
> >case nir_intrinsic_vote_any:
> >case nir_intrinsic_vote_all:
> >   if (nir_src_is_const(intrin->src[0]))
> >  replacement = nir_ssa_for_src(, intrin->src[0],
> 1);
> >   break;
> >case nir_intrinsic_vote_feq:
> >case nir_intrinsic_vote_ieq:
> >   if (nir_src_is_const(intrin->src[0]))
> >  replacement = nir_imm_true();
> >   break;
> > + case nir_intrinsic_load_sample_mask_in:
> > +/* Transform:
> > + *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> > + *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> > + */
> > +if (!options->optimize_sample_mask_in)
> > +   continue;
> > +
> > +nir_foreach_use_safe(use_src, >dest.ssa) {
> > +   if (use_src->parent_instr->type ==
> nir_instr_type_alu) {
> > +  nir_alu_instr *alu =
> > nir_instr_as_alu(use_src->parent_instr);
> > +
> > +  if (alu->op == nir_op_ieq ||
> > +  alu->op == nir_op_ine) {
> > + /* Check for 0 in either operand. */
> > + nir_const_value *const_val =
> > + nir_src_as_const_value(alu->src[0].src);
> > + if (!const_val)
> > +const_val =
> > nir_src_as_const_value(alu->src[1].src);
> > + if (!const_val || const_val->i32[0] != 0)
> > +continue;
> > +
> > + nir_ssa_def *new_expr =
> > nir_load_helper_invocation(, 1);
> > +
> > + if (alu->op == nir_op_ine32)
>
> How can this be nir_op_ine32 when the outer if condition is:
>
> alu->op == nir_op_ieq 

Re: [Mesa-dev] [PATCH] nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible

2019-04-15 Thread Timothy Arceri



On 16/4/19 7:03 am, Marek Olšák wrote:

ping

On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák > wrote:


From: Marek Olšák mailto:marek.ol...@amd.com>>

---
  src/compiler/nir/nir.h                    |  8 +
  src/compiler/nir/nir_opt_intrinsics.c     | 40 +--
  src/gallium/drivers/radeonsi/si_get.c     |  1 +
  src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
  4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 09950bf3398..d3874705235 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options {
      *
      * This depends on some possibly hw implementation details,
which may
      * not be true for all hw.  In particular that the FS is only
executed
      * for covered samples or for helper invocations.  So, do not
blindly
      * enable this option.
      *
      * Note: See also issue #22 in ARB_shader_image_load_store
      */
     bool lower_helper_invocation;

+   /**
+    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
+    *
+    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+    */
+   bool optimize_sample_mask_in;
+
     bool lower_cs_local_index_from_id;
     bool lower_cs_local_id_from_index;

     bool lower_device_index_to_zero;

     /* Set if nir_lower_wpos_ytransform() should also invert
gl_PointCoord. */
     bool lower_wpos_pntc;

     bool lower_hadd;
     bool lower_add_sat;
diff --git a/src/compiler/nir/nir_opt_intrinsics.c
b/src/compiler/nir/nir_opt_intrinsics.c
index 7b054faa204..2f9e4e466c9 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -22,21 +22,22 @@
   */

  #include "nir.h"
  #include "nir_builder.h"

  /**
   * \file nir_opt_intrinsics.c
   */

  static bool
-opt_intrinsics_impl(nir_function_impl *impl)
+opt_intrinsics_impl(nir_function_impl *impl,
+                    const struct nir_shader_compiler_options *options)
  {
     nir_builder b;
     nir_builder_init(, impl);
     bool progress = false;

     nir_foreach_block(block, impl) {
        nir_foreach_instr_safe(instr, block) {
           if (instr->type != nir_instr_type_intrinsic)
              continue;

@@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
           case nir_intrinsic_vote_any:
           case nir_intrinsic_vote_all:
              if (nir_src_is_const(intrin->src[0]))
                 replacement = nir_ssa_for_src(, intrin->src[0], 1);
              break;
           case nir_intrinsic_vote_feq:
           case nir_intrinsic_vote_ieq:
              if (nir_src_is_const(intrin->src[0]))
                 replacement = nir_imm_true();
              break;
+         case nir_intrinsic_load_sample_mask_in:
+            /* Transform:
+             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+             */
+            if (!options->optimize_sample_mask_in)
+               continue;
+
+            nir_foreach_use_safe(use_src, >dest.ssa) {
+               if (use_src->parent_instr->type == nir_instr_type_alu) {
+                  nir_alu_instr *alu =
nir_instr_as_alu(use_src->parent_instr);
+
+                  if (alu->op == nir_op_ieq ||
+                      alu->op == nir_op_ine) {
+                     /* Check for 0 in either operand. */
+                     nir_const_value *const_val =
+                         nir_src_as_const_value(alu->src[0].src);
+                     if (!const_val)
+                        const_val =
nir_src_as_const_value(alu->src[1].src);
+                     if (!const_val || const_val->i32[0] != 0)
+                        continue;
+
+                     nir_ssa_def *new_expr =
nir_load_helper_invocation(, 1);
+
+                     if (alu->op == nir_op_ine32)


How can this be nir_op_ine32 when the outer if condition is:

alu->op == nir_op_ieq || alu->op == nir_op_ine

??


+                        new_expr = nir_inot(, new_expr);
+
+                     nir_ssa_def_rewrite_uses(>dest.dest.ssa,
+ 
nir_src_for_ssa(new_expr));

+                     nir_instr_remove(>instr);
+                     continue;
+                  }
+               }
+            }
+            continue;
           default:
              break;
           }

           if (!replacement)
  

Re: [Mesa-dev] [PATCH] nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible

2019-04-15 Thread Marek Olšák
ping

On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák  wrote:

> From: Marek Olšák 
>
> ---
>  src/compiler/nir/nir.h|  8 +
>  src/compiler/nir/nir_opt_intrinsics.c | 40 +--
>  src/gallium/drivers/radeonsi/si_get.c |  1 +
>  src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
>  4 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 09950bf3398..d3874705235 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options {
>  *
>  * This depends on some possibly hw implementation details, which may
>  * not be true for all hw.  In particular that the FS is only executed
>  * for covered samples or for helper invocations.  So, do not blindly
>  * enable this option.
>  *
>  * Note: See also issue #22 in ARB_shader_image_load_store
>  */
> bool lower_helper_invocation;
>
> +   /**
> +* Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
> +*
> +*   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> +*   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> +*/
> +   bool optimize_sample_mask_in;
> +
> bool lower_cs_local_index_from_id;
> bool lower_cs_local_id_from_index;
>
> bool lower_device_index_to_zero;
>
> /* Set if nir_lower_wpos_ytransform() should also invert
> gl_PointCoord. */
> bool lower_wpos_pntc;
>
> bool lower_hadd;
> bool lower_add_sat;
> diff --git a/src/compiler/nir/nir_opt_intrinsics.c
> b/src/compiler/nir/nir_opt_intrinsics.c
> index 7b054faa204..2f9e4e466c9 100644
> --- a/src/compiler/nir/nir_opt_intrinsics.c
> +++ b/src/compiler/nir/nir_opt_intrinsics.c
> @@ -22,21 +22,22 @@
>   */
>
>  #include "nir.h"
>  #include "nir_builder.h"
>
>  /**
>   * \file nir_opt_intrinsics.c
>   */
>
>  static bool
> -opt_intrinsics_impl(nir_function_impl *impl)
> +opt_intrinsics_impl(nir_function_impl *impl,
> +const struct nir_shader_compiler_options *options)
>  {
> nir_builder b;
> nir_builder_init(, impl);
> bool progress = false;
>
> nir_foreach_block(block, impl) {
>nir_foreach_instr_safe(instr, block) {
>   if (instr->type != nir_instr_type_intrinsic)
>  continue;
>
> @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
>   case nir_intrinsic_vote_any:
>   case nir_intrinsic_vote_all:
>  if (nir_src_is_const(intrin->src[0]))
> replacement = nir_ssa_for_src(, intrin->src[0], 1);
>  break;
>   case nir_intrinsic_vote_feq:
>   case nir_intrinsic_vote_ieq:
>  if (nir_src_is_const(intrin->src[0]))
> replacement = nir_imm_true();
>  break;
> + case nir_intrinsic_load_sample_mask_in:
> +/* Transform:
> + *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> + *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> + */
> +if (!options->optimize_sample_mask_in)
> +   continue;
> +
> +nir_foreach_use_safe(use_src, >dest.ssa) {
> +   if (use_src->parent_instr->type == nir_instr_type_alu) {
> +  nir_alu_instr *alu =
> nir_instr_as_alu(use_src->parent_instr);
> +
> +  if (alu->op == nir_op_ieq ||
> +  alu->op == nir_op_ine) {
> + /* Check for 0 in either operand. */
> + nir_const_value *const_val =
> + nir_src_as_const_value(alu->src[0].src);
> + if (!const_val)
> +const_val =
> nir_src_as_const_value(alu->src[1].src);
> + if (!const_val || const_val->i32[0] != 0)
> +continue;
> +
> + nir_ssa_def *new_expr =
> nir_load_helper_invocation(, 1);
> +
> + if (alu->op == nir_op_ine32)
> +new_expr = nir_inot(, new_expr);
> +
> + nir_ssa_def_rewrite_uses(>dest.dest.ssa,
> +  nir_src_for_ssa(new_expr));
> + nir_instr_remove(>instr);
> + continue;
> +  }
> +   }
> +}
> +continue;
>   default:
>  break;
>   }
>
>   if (!replacement)
>  continue;
>
>   nir_ssa_def_rewrite_uses(>dest.ssa,
>nir_src_for_ssa(replacement));
>   nir_instr_remove(instr);
> @@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl)
>
>  bool
>  nir_opt_intrinsics(nir_shader *shader)
>  {
> bool progress = false;
>
> nir_foreach_function(function, shader) {
>if (!function->impl)
>   continue;
>
> -  if (opt_intrinsics_impl(function->impl)) {
> +