Re: [Mesa-dev] [PATCH 2/3] nir: Add an intrinsic for a bitmask of the whole subgroup

2017-10-31 Thread Jason Ekstrand
On Tue, Oct 31, 2017 at 10:55 AM, Neil Roberts  wrote:

> Similar to nir_intrinsic_load_subgroup_eq_mask and friends, this adds
> an intrinsic which contains a bit for every member of the group. This
> doesn’t have a corresponding GLSL builtin but it will be used to
> calculate nir_intrinsic_load_subgroup_g{t,e}_mask. It has its own nir
> option on whether to lower it. The idea is that this should be much
> easier to generate than the other masks because it will likely be a
> compile-time constant and if so it will generate more efficient code
> for the other masks.
>

As I remarked on patch 3, this isn't really going to be needed once my
stuff lands.


> ---
>  src/compiler/nir/nir.h|  1 +
>  src/compiler/nir/nir_intrinsics.h |  1 +
>  src/compiler/nir/nir_opt_intrinsics.c | 41 +++---
> -
>  src/intel/compiler/brw_compiler.c |  1 +
>  src/intel/compiler/brw_fs_nir.cpp |  1 +
>  5 files changed, 36 insertions(+), 9 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index dd833cf..edb02b9 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1836,6 +1836,7 @@ typedef struct nir_shader_compiler_options {
> bool lower_extract_word;
>
> bool lower_vote_trivial;
> +   bool lower_subgroup_all_mask;
> bool lower_subgroup_masks;
>
> /**
> diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_
> intrinsics.h
> index cefd18b..de362a8 100644
> --- a/src/compiler/nir/nir_intrinsics.h
> +++ b/src/compiler/nir/nir_intrinsics.h
> @@ -350,6 +350,7 @@ SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
> +SYSTEM_VALUE(subgroup_all_mask, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
>  SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
> diff --git a/src/compiler/nir/nir_opt_intrinsics.c
> b/src/compiler/nir/nir_opt_intrinsics.c
> index d5fdc51..71d79d7 100644
> --- a/src/compiler/nir/nir_opt_intrinsics.c
> +++ b/src/compiler/nir/nir_opt_intrinsics.c
> @@ -29,23 +29,39 @@
>   */
>
>  static nir_ssa_def *
> -high_subgroup_mask(nir_builder *b,
> -   nir_ssa_def *count,
> -   uint64_t base_mask)
> +subgroup_all_mask(nir_builder *b,
> +  nir_ssa_def *count)
>  {
> /* group_mask could probably be calculated more efficiently but we
> want to
>  * be sure not to shift by 64 if the subgroup size is 64 because the
> GLSL
> -* shift operator is undefined in that case. In any case if we were
> worried
> -* about efficency this should probably be done further down because
> the
> -* subgroup size is likely to be known at compile time.
> +* shift operator is undefined in that case. In any case if the driver
> is
> +* worried about efficency this should probably be done further down
> +* because the subgroup size is likely to be known at compile time.
>  */
> nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
> nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
> nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
> -   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
> -   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask),
> count);
> +   return nir_ushr(b, all_bits, shift);
> +}
>
> -   return nir_iand(b, higher_bits, group_mask);
> +static nir_ssa_def *
> +high_subgroup_mask(nir_builder *b,
> +   nir_ssa_def *count,
> +   uint64_t base_mask)
> +{
> +   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask),
> count);
> +   nir_intrinsic_instr *load_group_mask =
> +  nir_intrinsic_instr_create(b->shader,
> + nir_intrinsic_load_subgroup_all_mask);
> +   load_group_mask->num_components = 1;
> +   nir_ssa_dest_init(_group_mask->instr,
> + _group_mask->dest,
> + 1 /* num_components */,
> + 64 /* bit_size */,
> + NULL /* name */);
> +   nir_builder_instr_insert(b, _group_mask->instr);
> +
> +   return nir_iand(b, higher_bits, _group_mask->dest.ssa);
>  }
>
>  static bool
> @@ -100,6 +116,10 @@ opt_intrinsics_impl(nir_function_impl *impl)
>   nir_imm_int(, 0));
>  break;
>   }
> + case nir_intrinsic_load_subgroup_all_mask:
> +if (!b.shader->options->lower_subgroup_all_mask)
> +   break;
> +/* flow through */
>   case nir_intrinsic_load_subgroup_eq_mask:
>   case nir_intrinsic_load_subgroup_ge_mask:
>   case nir_intrinsic_load_subgroup_gt_mask:
> @@ -111,6 +131,9 @@ opt_intrinsics_impl(nir_function_impl *impl)
>  nir_ssa_def *count = 

[Mesa-dev] [PATCH 2/3] nir: Add an intrinsic for a bitmask of the whole subgroup

2017-10-31 Thread Neil Roberts
Similar to nir_intrinsic_load_subgroup_eq_mask and friends, this adds
an intrinsic which contains a bit for every member of the group. This
doesn’t have a corresponding GLSL builtin but it will be used to
calculate nir_intrinsic_load_subgroup_g{t,e}_mask. It has its own nir
option on whether to lower it. The idea is that this should be much
easier to generate than the other masks because it will likely be a
compile-time constant and if so it will generate more efficient code
for the other masks.
---
 src/compiler/nir/nir.h|  1 +
 src/compiler/nir/nir_intrinsics.h |  1 +
 src/compiler/nir/nir_opt_intrinsics.c | 41 +++
 src/intel/compiler/brw_compiler.c |  1 +
 src/intel/compiler/brw_fs_nir.cpp |  1 +
 5 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index dd833cf..edb02b9 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1836,6 +1836,7 @@ typedef struct nir_shader_compiler_options {
bool lower_extract_word;
 
bool lower_vote_trivial;
+   bool lower_subgroup_all_mask;
bool lower_subgroup_masks;
 
/**
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index cefd18b..de362a8 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -350,6 +350,7 @@ SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_all_mask, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
b/src/compiler/nir/nir_opt_intrinsics.c
index d5fdc51..71d79d7 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -29,23 +29,39 @@
  */
 
 static nir_ssa_def *
-high_subgroup_mask(nir_builder *b,
-   nir_ssa_def *count,
-   uint64_t base_mask)
+subgroup_all_mask(nir_builder *b,
+  nir_ssa_def *count)
 {
/* group_mask could probably be calculated more efficiently but we want to
 * be sure not to shift by 64 if the subgroup size is 64 because the GLSL
-* shift operator is undefined in that case. In any case if we were worried
-* about efficency this should probably be done further down because the
-* subgroup size is likely to be known at compile time.
+* shift operator is undefined in that case. In any case if the driver is
+* worried about efficency this should probably be done further down
+* because the subgroup size is likely to be known at compile time.
 */
nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
-   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
-   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+   return nir_ushr(b, all_bits, shift);
+}
 
-   return nir_iand(b, higher_bits, group_mask);
+static nir_ssa_def *
+high_subgroup_mask(nir_builder *b,
+   nir_ssa_def *count,
+   uint64_t base_mask)
+{
+   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+   nir_intrinsic_instr *load_group_mask =
+  nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_subgroup_all_mask);
+   load_group_mask->num_components = 1;
+   nir_ssa_dest_init(_group_mask->instr,
+ _group_mask->dest,
+ 1 /* num_components */,
+ 64 /* bit_size */,
+ NULL /* name */);
+   nir_builder_instr_insert(b, _group_mask->instr);
+
+   return nir_iand(b, higher_bits, _group_mask->dest.ssa);
 }
 
 static bool
@@ -100,6 +116,10 @@ opt_intrinsics_impl(nir_function_impl *impl)
  nir_imm_int(, 0));
 break;
  }
+ case nir_intrinsic_load_subgroup_all_mask:
+if (!b.shader->options->lower_subgroup_all_mask)
+   break;
+/* flow through */
  case nir_intrinsic_load_subgroup_eq_mask:
  case nir_intrinsic_load_subgroup_ge_mask:
  case nir_intrinsic_load_subgroup_gt_mask:
@@ -111,6 +131,9 @@ opt_intrinsics_impl(nir_function_impl *impl)
 nir_ssa_def *count = nir_load_subgroup_invocation();
 
 switch (intrin->intrinsic) {
+case nir_intrinsic_load_subgroup_all_mask:
+   replacement = subgroup_all_mask(, count);
+   break;
 case nir_intrinsic_load_subgroup_eq_mask:
replacement = nir_ishl(, nir_imm_int64(, 1ull), count);
break;
diff --git