All 4 patches are: Reviewed-by: Iago Toral Quiroga <[email protected]>
On Tue, 2017-10-31 at 16:54 -0700, Jason Ekstrand wrote: > This commit pulls nir_lower_read_invocations_to_scalar along with > most > of the guts of nir_opt_intrinsics (which mostly does subgroup > lowering) > into a new nir_lower_subgroups pass. There are various other bits of > subgroup lowering that we're going to want to do so it makes a bit > more > sense to keep it all together in one pass. We also move it in i965 > to > happen after nir_lower_system_values to ensure that because we want > to > handle the subgroup mask system value intrinsics here. > > Reviewed-by: Iago Toral Quiroga <[email protected]> > --- > src/compiler/Makefile.sources | 2 +- > src/compiler/nir/meson.build | 2 +- > src/compiler/nir/nir.h | 12 +- > .../nir/nir_lower_read_invocation_to_scalar.c | 112 --------- > ---- > src/compiler/nir/nir_lower_subgroups.c | 184 > +++++++++++++++++++++ > src/compiler/nir/nir_opt_intrinsics.c | 71 +------- > src/intel/compiler/brw_compiler.c | 3 - > src/intel/compiler/brw_nir.c | 8 +- > 8 files changed, 208 insertions(+), 186 deletions(-) > delete mode 100644 > src/compiler/nir/nir_lower_read_invocation_to_scalar.c > create mode 100644 src/compiler/nir/nir_lower_subgroups.c > > diff --git a/src/compiler/Makefile.sources > b/src/compiler/Makefile.sources > index 27cc33a..12d932f 100644 > --- a/src/compiler/Makefile.sources > +++ b/src/compiler/Makefile.sources > @@ -233,11 +233,11 @@ NIR_FILES = \ > nir/nir_lower_passthrough_edgeflags.c \ > nir/nir_lower_patch_vertices.c \ > nir/nir_lower_phis_to_scalar.c \ > - nir/nir_lower_read_invocation_to_scalar.c \ > nir/nir_lower_regs_to_ssa.c \ > nir/nir_lower_returns.c \ > nir/nir_lower_samplers.c \ > nir/nir_lower_samplers_as_deref.c \ > + nir/nir_lower_subgroups.c \ > nir/nir_lower_system_values.c \ > nir/nir_lower_tex.c \ > nir/nir_lower_to_source_mods.c \ > diff --git a/src/compiler/nir/meson.build > b/src/compiler/nir/meson.build > index cb88eff..5c22bf6 100644 > --- a/src/compiler/nir/meson.build > +++ b/src/compiler/nir/meson.build > @@ -120,11 +120,11 @@ files_libnir = files( > 'nir_lower_passthrough_edgeflags.c', > 'nir_lower_patch_vertices.c', > 'nir_lower_phis_to_scalar.c', > - 'nir_lower_read_invocation_to_scalar.c', > 'nir_lower_regs_to_ssa.c', > 'nir_lower_returns.c', > 'nir_lower_samplers.c', > 'nir_lower_samplers_as_deref.c', > + 'nir_lower_subgroups.c', > 'nir_lower_system_values.c', > 'nir_lower_tex.c', > 'nir_lower_to_source_mods.c', > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index dd833cf..8c3a20c 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -1835,9 +1835,6 @@ typedef struct nir_shader_compiler_options { > bool lower_extract_byte; > bool lower_extract_word; > > - bool lower_vote_trivial; > - bool lower_subgroup_masks; > - > /** > * Does the driver support real 32-bit integers? (Otherwise, > integers > * are simulated by floats.) > @@ -2462,6 +2459,15 @@ bool nir_lower_samplers(nir_shader *shader, > bool nir_lower_samplers_as_deref(nir_shader *shader, > const struct gl_shader_program > *shader_program); > > +typedef struct nir_lower_subgroups_options { > + bool lower_to_scalar:1; > + bool lower_vote_trivial:1; > + bool lower_subgroup_masks:1; > +} nir_lower_subgroups_options; > + > +bool nir_lower_subgroups(nir_shader *shader, > + const nir_lower_subgroups_options > *options); > + > bool nir_lower_system_values(nir_shader *shader); > > typedef struct nir_lower_tex_options { > diff --git a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c > b/src/compiler/nir/nir_lower_read_invocation_to_scalar.c > deleted file mode 100644 > index 69e7c0a..0000000 > --- a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c > +++ /dev/null > @@ -1,112 +0,0 @@ > -/* > - * Copyright © 2017 Intel Corporation > - * > - * Permission is hereby granted, free of charge, to any person > obtaining a > - * copy of this software and associated documentation files (the > "Software"), > - * to deal in the Software without restriction, including without > limitation > - * the rights to use, copy, modify, merge, publish, distribute, > sublicense, > - * and/or sell copies of the Software, and to permit persons to whom > the > - * Software is furnished to do so, subject to the following > conditions: > - * > - * The above copyright notice and this permission notice (including > the next > - * paragraph) shall be included in all copies or substantial > portions of the > - * Software. > - * > - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL > - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES > OR OTHER > - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > OTHER DEALINGS > - * IN THE SOFTWARE. > - */ > - > -#include "nir.h" > -#include "nir_builder.h" > - > -/** @file nir_lower_read_invocation_to_scalar.c > - * > - * Replaces > nir_intrinsic_read_invocation/nir_intrinsic_read_first_invocation > - * operations with num_components != 1 with individual per-channel > operations. > - */ > - > -static void > -lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr > *intrin) > -{ > - b->cursor = nir_before_instr(&intrin->instr); > - > - nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0], intrin- > >num_components); > - nir_ssa_def *reads[4]; > - > - for (unsigned i = 0; i < intrin->num_components; i++) { > - nir_intrinsic_instr *chan_intrin = > - nir_intrinsic_instr_create(b->shader, intrin->intrinsic); > - nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest, > - 1, intrin->dest.ssa.bit_size, NULL); > - chan_intrin->num_components = 1; > - > - /* value */ > - chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, > i)); > - /* invocation */ > - if (intrin->intrinsic == nir_intrinsic_read_invocation) > - nir_src_copy(&chan_intrin->src[1], &intrin->src[1], > chan_intrin); > - > - nir_builder_instr_insert(b, &chan_intrin->instr); > - > - reads[i] = &chan_intrin->dest.ssa; > - } > - > - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, > - nir_src_for_ssa(nir_vec(b, reads, > - intrin- > >num_components))); > - nir_instr_remove(&intrin->instr); > -} > - > -static bool > -nir_lower_read_invocation_to_scalar_impl(nir_function_impl *impl) > -{ > - bool progress = false; > - nir_builder b; > - nir_builder_init(&b, impl); > - > - nir_foreach_block(block, impl) { > - nir_foreach_instr_safe(instr, block) { > - if (instr->type != nir_instr_type_intrinsic) > - continue; > - > - nir_intrinsic_instr *intrin = > nir_instr_as_intrinsic(instr); > - > - if (intrin->num_components == 1) > - continue; > - > - switch (intrin->intrinsic) { > - case nir_intrinsic_read_invocation: > - case nir_intrinsic_read_first_invocation: > - lower_read_invocation_to_scalar(&b, intrin); > - progress = true; > - break; > - default: > - break; > - } > - } > - } > - > - if (progress) { > - nir_metadata_preserve(impl, nir_metadata_block_index | > - nir_metadata_dominance); > - } > - return progress; > -} > - > -bool > -nir_lower_read_invocation_to_scalar(nir_shader *shader) > -{ > - bool progress = false; > - > - nir_foreach_function(function, shader) { > - if (function->impl) > - progress |= > nir_lower_read_invocation_to_scalar_impl(function->impl); > - } > - > - return progress; > -} > diff --git a/src/compiler/nir/nir_lower_subgroups.c > b/src/compiler/nir/nir_lower_subgroups.c > new file mode 100644 > index 0000000..0d11dc9 > --- /dev/null > +++ b/src/compiler/nir/nir_lower_subgroups.c > @@ -0,0 +1,184 @@ > +/* > + * Copyright © 2017 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person > obtaining a > + * copy of this software and associated documentation files (the > "Software"), > + * to deal in the Software without restriction, including without > limitation > + * the rights to use, copy, modify, merge, publish, distribute, > sublicense, > + * and/or sell copies of the Software, and to permit persons to whom > the > + * Software is furnished to do so, subject to the following > conditions: > + * > + * The above copyright notice and this permission notice (including > the next > + * paragraph) shall be included in all copies or substantial > portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES > OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > OTHER DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "nir.h" > +#include "nir_builder.h" > + > +/** > + * \file nir_opt_intrinsics.c > + */ > + > +static nir_ssa_def * > +lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr > *intrin) > +{ > + /* This is safe to call on scalar things but it would be silly */ > + assert(intrin->dest.ssa.num_components > 1); > + > + nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0], > + intrin->num_components); > + nir_ssa_def *reads[4]; > + > + for (unsigned i = 0; i < intrin->num_components; i++) { > + nir_intrinsic_instr *chan_intrin = > + nir_intrinsic_instr_create(b->shader, intrin->intrinsic); > + nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest, > + 1, intrin->dest.ssa.bit_size, NULL); > + chan_intrin->num_components = 1; > + > + /* value */ > + chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, > i)); > + /* invocation */ > + if (intrin->intrinsic == nir_intrinsic_read_invocation) > + nir_src_copy(&chan_intrin->src[1], &intrin->src[1], > chan_intrin); > + > + nir_builder_instr_insert(b, &chan_intrin->instr); > + > + reads[i] = &chan_intrin->dest.ssa; > + } > + > + return nir_vec(b, reads, intrin->num_components); > +} > + > +static nir_ssa_def * > +high_subgroup_mask(nir_builder *b, > + nir_ssa_def *count, > + uint64_t base_mask) > +{ > + /* group_mask could probably be calculated more efficiently but > we want to > + * be sure not to shift by 64 if the subgroup size is 64 because > the GLSL > + * shift operator is undefined in that case. In any case if we > were worried > + * about efficency this should probably be done further down > because the > + * subgroup size is likely to be known at compile time. > + */ > + nir_ssa_def *subgroup_size = nir_load_subgroup_size(b); > + nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull); > + nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), > subgroup_size); > + nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift); > + nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, > base_mask), count); > + > + return nir_iand(b, higher_bits, group_mask); > +} > + > +static nir_ssa_def * > +lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin, > + const nir_lower_subgroups_options *options) > +{ > + switch (intrin->intrinsic) { > + case nir_intrinsic_vote_any: > + case nir_intrinsic_vote_all: > + if (options->lower_vote_trivial) > + return nir_ssa_for_src(b, intrin->src[0], 1); > + break; > + > + case nir_intrinsic_vote_eq: > + if (options->lower_vote_trivial) > + return nir_imm_int(b, NIR_TRUE); > + break; > + > + case nir_intrinsic_read_invocation: > + case nir_intrinsic_read_first_invocation: > + if (options->lower_to_scalar && intrin->num_components > 1) > + return lower_read_invocation_to_scalar(b, intrin); > + break; > + > + case nir_intrinsic_load_subgroup_eq_mask: > + case nir_intrinsic_load_subgroup_ge_mask: > + case nir_intrinsic_load_subgroup_gt_mask: > + case nir_intrinsic_load_subgroup_le_mask: > + case nir_intrinsic_load_subgroup_lt_mask: { > + if (!options->lower_subgroup_masks) > + return NULL; > + > + nir_ssa_def *count = nir_load_subgroup_invocation(b); > + > + switch (intrin->intrinsic) { > + case nir_intrinsic_load_subgroup_eq_mask: > + return nir_ishl(b, nir_imm_int64(b, 1ull), count); > + case nir_intrinsic_load_subgroup_ge_mask: > + return high_subgroup_mask(b, count, ~0ull); > + case nir_intrinsic_load_subgroup_gt_mask: > + return high_subgroup_mask(b, count, ~1ull); > + case nir_intrinsic_load_subgroup_le_mask: > + return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), > count)); > + case nir_intrinsic_load_subgroup_lt_mask: > + return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), > count)); > + default: > + unreachable("you seriously can't tell this is > unreachable?"); > + } > + break; > + } > + default: > + break; > + } > + > + return NULL; > +} > + > +static bool > +lower_subgroups_impl(nir_function_impl *impl, > + const nir_lower_subgroups_options *options) > +{ > + nir_builder b; > + nir_builder_init(&b, impl); > + bool progress = false; > + > + nir_foreach_block(block, impl) { > + nir_foreach_instr_safe(instr, block) { > + if (instr->type != nir_instr_type_intrinsic) > + continue; > + > + nir_intrinsic_instr *intrin = > nir_instr_as_intrinsic(instr); > + b.cursor = nir_before_instr(instr); > + > + nir_ssa_def *lower = lower_subgroups_intrin(&b, intrin, > options); > + if (!lower) > + continue; > + > + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, > nir_src_for_ssa(lower)); > + nir_instr_remove(instr); > + progress = true; > + } > + } > + > + return progress; > +} > + > +bool > +nir_lower_subgroups(nir_shader *shader, > + const nir_lower_subgroups_options *options) > +{ > + bool progress = false; > + > + nir_foreach_function(function, shader) { > + if (!function->impl) > + continue; > + > + if (lower_subgroups_impl(function->impl, options)) { > + progress = true; > + nir_metadata_preserve(function->impl, > nir_metadata_block_index | > + nir_metadata_dominanc > e); > + } > + } > + > + return progress; > +} > diff --git a/src/compiler/nir/nir_opt_intrinsics.c > b/src/compiler/nir/nir_opt_intrinsics.c > index d5fdc51..98c8b1a 100644 > --- a/src/compiler/nir/nir_opt_intrinsics.c > +++ b/src/compiler/nir/nir_opt_intrinsics.c > @@ -28,26 +28,6 @@ > * \file nir_opt_intrinsics.c > */ > > -static nir_ssa_def * > -high_subgroup_mask(nir_builder *b, > - nir_ssa_def *count, > - uint64_t base_mask) > -{ > - /* group_mask could probably be calculated more efficiently but > we want to > - * be sure not to shift by 64 if the subgroup size is 64 because > the GLSL > - * shift operator is undefined in that case. In any case if we > were worried > - * about efficency this should probably be done further down > because the > - * subgroup size is likely to be known at compile time. > - */ > - nir_ssa_def *subgroup_size = nir_load_subgroup_size(b); > - nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull); > - nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), > subgroup_size); > - nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift); > - nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, > base_mask), count); > - > - return nir_iand(b, higher_bits, group_mask); > -} > - > static bool > opt_intrinsics_impl(nir_function_impl *impl) > { > @@ -66,22 +46,14 @@ opt_intrinsics_impl(nir_function_impl *impl) > > switch (intrin->intrinsic) { > case nir_intrinsic_vote_any: > - case nir_intrinsic_vote_all: { > - nir_const_value *val = nir_src_as_const_value(intrin- > >src[0]); > - if (!val && !b.shader->options->lower_vote_trivial) > - continue; > - > - replacement = nir_ssa_for_src(&b, intrin->src[0], 1); > + case nir_intrinsic_vote_all: > + if (nir_src_as_const_value(intrin->src[0])) > + replacement = nir_ssa_for_src(&b, intrin->src[0], 1); > break; > - } > - case nir_intrinsic_vote_eq: { > - nir_const_value *val = nir_src_as_const_value(intrin- > >src[0]); > - if (!val && !b.shader->options->lower_vote_trivial) > - continue; > - > - replacement = nir_imm_int(&b, NIR_TRUE); > + case nir_intrinsic_vote_eq: > + if (nir_src_as_const_value(intrin->src[0])) > + replacement = nir_imm_int(&b, NIR_TRUE); > break; > - } > case nir_intrinsic_ballot: { > assert(b.shader->options->max_subgroup_size != 0); > if (b.shader->options->max_subgroup_size > 32 || > @@ -100,37 +72,6 @@ opt_intrinsics_impl(nir_function_impl *impl) > nir_imm_int(&b, > 0)); > break; > } > - case nir_intrinsic_load_subgroup_eq_mask: > - case nir_intrinsic_load_subgroup_ge_mask: > - case nir_intrinsic_load_subgroup_gt_mask: > - case nir_intrinsic_load_subgroup_le_mask: > - case nir_intrinsic_load_subgroup_lt_mask: { > - if (!b.shader->options->lower_subgroup_masks) > - break; > - > - nir_ssa_def *count = nir_load_subgroup_invocation(&b); > - > - switch (intrin->intrinsic) { > - case nir_intrinsic_load_subgroup_eq_mask: > - replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), > count); > - break; > - case nir_intrinsic_load_subgroup_ge_mask: > - replacement = high_subgroup_mask(&b, count, ~0ull); > - break; > - case nir_intrinsic_load_subgroup_gt_mask: > - replacement = high_subgroup_mask(&b, count, ~1ull); > - break; > - case nir_intrinsic_load_subgroup_le_mask: > - replacement = nir_inot(&b, nir_ishl(&b, > nir_imm_int64(&b, ~1ull), count)); > - break; > - case nir_intrinsic_load_subgroup_lt_mask: > - replacement = nir_inot(&b, nir_ishl(&b, > nir_imm_int64(&b, ~0ull), count)); > - break; > - default: > - unreachable("you seriously can't tell this is > unreachable?"); > - } > - break; > - } > default: > break; > } > diff --git a/src/intel/compiler/brw_compiler.c > b/src/intel/compiler/brw_compiler.c > index 2f6af7d..a6129e9 100644 > --- a/src/intel/compiler/brw_compiler.c > +++ b/src/intel/compiler/brw_compiler.c > @@ -57,7 +57,6 @@ static const struct nir_shader_compiler_options > scalar_nir_options = { > .lower_unpack_snorm_4x8 = true, > .lower_unpack_unorm_2x16 = true, > .lower_unpack_unorm_4x8 = true, > - .lower_subgroup_masks = true, > .max_subgroup_size = 32, > .max_unroll_iterations = 32, > }; > @@ -80,7 +79,6 @@ static const struct nir_shader_compiler_options > vector_nir_options = { > .lower_unpack_unorm_2x16 = true, > .lower_extract_byte = true, > .lower_extract_word = true, > - .lower_vote_trivial = true, > .max_unroll_iterations = 32, > }; > > @@ -99,7 +97,6 @@ static const struct nir_shader_compiler_options > vector_nir_options_gen6 = { > .lower_unpack_unorm_2x16 = true, > .lower_extract_byte = true, > .lower_extract_word = true, > - .lower_vote_trivial = true, > .max_unroll_iterations = 32, > }; > > diff --git a/src/intel/compiler/brw_nir.c > b/src/intel/compiler/brw_nir.c > index e5ff6de..f599f74 100644 > --- a/src/intel/compiler/brw_nir.c > +++ b/src/intel/compiler/brw_nir.c > @@ -620,7 +620,6 @@ brw_preprocess_nir(const struct brw_compiler > *compiler, nir_shader *nir) > > OPT(nir_lower_tex, &tex_options); > OPT(nir_normalize_cubemap_coords); > - OPT(nir_lower_read_invocation_to_scalar); > > OPT(nir_lower_global_vars_to_local); > > @@ -637,6 +636,13 @@ brw_preprocess_nir(const struct brw_compiler > *compiler, nir_shader *nir) > > OPT(nir_lower_system_values); > > + const nir_lower_subgroups_options subgroups_options = { > + .lower_to_scalar = true, > + .lower_subgroup_masks = true, > + .lower_vote_trivial = !is_scalar, > + }; > + OPT(nir_lower_subgroups, &subgroups_options); > + > OPT(nir_lower_clip_cull_distance_arrays); > > nir_variable_mode indirect_mask = 0; _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
