Re: [Mesa-dev] [PATCH 2/3] nir: Add an intrinsic for a bitmask of the whole subgroup
On Tue, Oct 31, 2017 at 10:55 AM, Neil Robertswrote: > Similar to nir_intrinsic_load_subgroup_eq_mask and friends, this adds > an intrinsic which contains a bit for every member of the group. This > doesn’t have a corresponding GLSL builtin but it will be used to > calculate nir_intrinsic_load_subgroup_g{t,e}_mask. It has its own nir > option on whether to lower it. The idea is that this should be much > easier to generate than the other masks because it will likely be a > compile-time constant and if so it will generate more efficient code > for the other masks. > As I remarked on patch 3, this isn't really going to be needed once my stuff lands. > --- > src/compiler/nir/nir.h| 1 + > src/compiler/nir/nir_intrinsics.h | 1 + > src/compiler/nir/nir_opt_intrinsics.c | 41 +++--- > - > src/intel/compiler/brw_compiler.c | 1 + > src/intel/compiler/brw_fs_nir.cpp | 1 + > 5 files changed, 36 insertions(+), 9 deletions(-) > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index dd833cf..edb02b9 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -1836,6 +1836,7 @@ typedef struct nir_shader_compiler_options { > bool lower_extract_word; > > bool lower_vote_trivial; > + bool lower_subgroup_all_mask; > bool lower_subgroup_masks; > > /** > diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_ > intrinsics.h > index cefd18b..de362a8 100644 > --- a/src/compiler/nir/nir_intrinsics.h > +++ b/src/compiler/nir/nir_intrinsics.h > @@ -350,6 +350,7 @@ SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) > SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) > SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) > SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) > +SYSTEM_VALUE(subgroup_all_mask, 1, 0, xx, xx, xx) > SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) > SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) > SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) > diff --git a/src/compiler/nir/nir_opt_intrinsics.c > b/src/compiler/nir/nir_opt_intrinsics.c > index d5fdc51..71d79d7 100644 > --- a/src/compiler/nir/nir_opt_intrinsics.c > +++ b/src/compiler/nir/nir_opt_intrinsics.c > @@ -29,23 +29,39 @@ > */ > > static nir_ssa_def * > -high_subgroup_mask(nir_builder *b, > - nir_ssa_def *count, > - uint64_t base_mask) > +subgroup_all_mask(nir_builder *b, > + nir_ssa_def *count) > { > /* group_mask could probably be calculated more efficiently but we > want to > * be sure not to shift by 64 if the subgroup size is 64 because the > GLSL > -* shift operator is undefined in that case. In any case if we were > worried > -* about efficency this should probably be done further down because > the > -* subgroup size is likely to be known at compile time. > +* shift operator is undefined in that case. In any case if the driver > is > +* worried about efficency this should probably be done further down > +* because the subgroup size is likely to be known at compile time. > */ > nir_ssa_def *subgroup_size = nir_load_subgroup_size(b); > nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull); > nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size); > - nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift); > - nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), > count); > + return nir_ushr(b, all_bits, shift); > +} > > - return nir_iand(b, higher_bits, group_mask); > +static nir_ssa_def * > +high_subgroup_mask(nir_builder *b, > + nir_ssa_def *count, > + uint64_t base_mask) > +{ > + nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), > count); > + nir_intrinsic_instr *load_group_mask = > + nir_intrinsic_instr_create(b->shader, > + nir_intrinsic_load_subgroup_all_mask); > + load_group_mask->num_components = 1; > + nir_ssa_dest_init(_group_mask->instr, > + _group_mask->dest, > + 1 /* num_components */, > + 64 /* bit_size */, > + NULL /* name */); > + nir_builder_instr_insert(b, _group_mask->instr); > + > + return nir_iand(b, higher_bits, _group_mask->dest.ssa); > } > > static bool > @@ -100,6 +116,10 @@ opt_intrinsics_impl(nir_function_impl *impl) > nir_imm_int(, 0)); > break; > } > + case nir_intrinsic_load_subgroup_all_mask: > +if (!b.shader->options->lower_subgroup_all_mask) > + break; > +/* flow through */ > case nir_intrinsic_load_subgroup_eq_mask: > case nir_intrinsic_load_subgroup_ge_mask: > case nir_intrinsic_load_subgroup_gt_mask: > @@ -111,6 +131,9 @@ opt_intrinsics_impl(nir_function_impl *impl) > nir_ssa_def *count =
[Mesa-dev] [PATCH 2/3] nir: Add an intrinsic for a bitmask of the whole subgroup
Similar to nir_intrinsic_load_subgroup_eq_mask and friends, this adds an intrinsic which contains a bit for every member of the group. This doesn’t have a corresponding GLSL builtin but it will be used to calculate nir_intrinsic_load_subgroup_g{t,e}_mask. It has its own nir option on whether to lower it. The idea is that this should be much easier to generate than the other masks because it will likely be a compile-time constant and if so it will generate more efficient code for the other masks. --- src/compiler/nir/nir.h| 1 + src/compiler/nir/nir_intrinsics.h | 1 + src/compiler/nir/nir_opt_intrinsics.c | 41 +++ src/intel/compiler/brw_compiler.c | 1 + src/intel/compiler/brw_fs_nir.cpp | 1 + 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index dd833cf..edb02b9 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1836,6 +1836,7 @@ typedef struct nir_shader_compiler_options { bool lower_extract_word; bool lower_vote_trivial; + bool lower_subgroup_all_mask; bool lower_subgroup_masks; /** diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index cefd18b..de362a8 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -350,6 +350,7 @@ SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_all_mask, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index d5fdc51..71d79d7 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -29,23 +29,39 @@ */ static nir_ssa_def * -high_subgroup_mask(nir_builder *b, - nir_ssa_def *count, - uint64_t base_mask) +subgroup_all_mask(nir_builder *b, + nir_ssa_def *count) { /* group_mask could probably be calculated more efficiently but we want to * be sure not to shift by 64 if the subgroup size is 64 because the GLSL -* shift operator is undefined in that case. In any case if we were worried -* about efficency this should probably be done further down because the -* subgroup size is likely to be known at compile time. +* shift operator is undefined in that case. In any case if the driver is +* worried about efficency this should probably be done further down +* because the subgroup size is likely to be known at compile time. */ nir_ssa_def *subgroup_size = nir_load_subgroup_size(b); nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull); nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size); - nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift); - nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count); + return nir_ushr(b, all_bits, shift); +} - return nir_iand(b, higher_bits, group_mask); +static nir_ssa_def * +high_subgroup_mask(nir_builder *b, + nir_ssa_def *count, + uint64_t base_mask) +{ + nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count); + nir_intrinsic_instr *load_group_mask = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_subgroup_all_mask); + load_group_mask->num_components = 1; + nir_ssa_dest_init(_group_mask->instr, + _group_mask->dest, + 1 /* num_components */, + 64 /* bit_size */, + NULL /* name */); + nir_builder_instr_insert(b, _group_mask->instr); + + return nir_iand(b, higher_bits, _group_mask->dest.ssa); } static bool @@ -100,6 +116,10 @@ opt_intrinsics_impl(nir_function_impl *impl) nir_imm_int(, 0)); break; } + case nir_intrinsic_load_subgroup_all_mask: +if (!b.shader->options->lower_subgroup_all_mask) + break; +/* flow through */ case nir_intrinsic_load_subgroup_eq_mask: case nir_intrinsic_load_subgroup_ge_mask: case nir_intrinsic_load_subgroup_gt_mask: @@ -111,6 +131,9 @@ opt_intrinsics_impl(nir_function_impl *impl) nir_ssa_def *count = nir_load_subgroup_invocation(); switch (intrin->intrinsic) { +case nir_intrinsic_load_subgroup_all_mask: + replacement = subgroup_all_mask(, count); + break; case nir_intrinsic_load_subgroup_eq_mask: replacement = nir_ishl(, nir_imm_int64(, 1ull), count); break; diff --git