On Mon, Oct 30, 2017 at 11:53 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Mon, Oct 30, 2017 at 5:10 AM, Iago Toral <ito...@igalia.com> wrote: > >> On Wed, 2017-10-25 at 16:26 -0700, Jason Ekstrand wrote: >> > Ballot intrinsics return a bitfield of subgroups. In GLSL and some >> > SPIR-V extensions, they return a uint64_t. In SPV_KHR_shader_ballot, >> > they return a uvec4. Also, some back-ends would rather pass around >> > 32-bit values because it's easier than messing with 64-bit all the >> > time. >> > To solve this mess, we make nir_lower_subgroups take a new parameter >> > called ballot_bit_size and it lowers whichever thing it gets in from >> > the >> > source language (uint64_t or uvec4) to a scalar with the specified >> > number of bits. This replaces a chunk of the old lowering code. >> > >> > Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> >> > --- >> > src/compiler/nir/nir.h | 3 +- >> > src/compiler/nir/nir_lower_subgroups.c | 101 >> > +++++++++++++++++++++++++++++++-- >> > src/compiler/nir/nir_opt_intrinsics.c | 18 ------ >> > src/intel/compiler/brw_compiler.c | 1 - >> > src/intel/compiler/brw_nir.c | 1 + >> > 5 files changed, 98 insertions(+), 26 deletions(-) >> > >> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> > index 1a25d7b..563b57f 100644 >> > --- a/src/compiler/nir/nir.h >> > +++ b/src/compiler/nir/nir.h >> > @@ -1854,8 +1854,6 @@ typedef struct nir_shader_compiler_options { >> > */ >> > bool use_interpolated_input_intrinsics; >> > >> > - unsigned max_subgroup_size; >> > - >> > unsigned max_unroll_iterations; >> > } nir_shader_compiler_options; >> > >> > @@ -2469,6 +2467,7 @@ bool nir_lower_samplers_as_deref(nir_shader >> > *shader, >> > const struct gl_shader_program >> > *shader_program); >> > >> > typedef struct nir_lower_subgroups_options { >> > + uint8_t ballot_bit_size; >> > bool lower_to_scalar:1; >> > bool lower_vote_trivial:1; >> > bool lower_subgroup_masks:1; >> > diff --git a/src/compiler/nir/nir_lower_subgroups.c >> > b/src/compiler/nir/nir_lower_subgroups.c >> > index 02738c4..1969740 100644 >> > --- a/src/compiler/nir/nir_lower_subgroups.c >> > +++ b/src/compiler/nir/nir_lower_subgroups.c >> > @@ -28,6 +28,43 @@ >> > * \file nir_opt_intrinsics.c >> > */ >> > >> > +/* Converts a uint32_t or uint64_t value to uint64_t or uvec4 */ >> > +static nir_ssa_def * >> > +uint_to_ballot_type(nir_builder *b, nir_ssa_def *value, >> > + unsigned num_components, unsigned bit_size, >> > + uint32_t extend_val) >> > +{ >> > + assert(value->num_components == 1); >> > + assert(value->bit_size == 32 || value->bit_size == 64); >> > + >> > + nir_ssa_def *extend = nir_imm_int(b, extend_val); >> >> Is it required that we do this extension? would it be incorrect if we >> extended with 0's? >> > > Thanks for making me look at that. The Vulkan spec requires that they be > set to 0. I guess I need to go rework some things. :) > I did a bit more looking and things here are thoroughly insane. I've filed CTS and spec bugs. Let's see where they go before I rewrite anything. > > + if (num_components > 1) { >> > + /* SPIR-V uses a uvec4 for ballot values */ >> > + assert(num_components == 4); >> > + assert(bit_size == 32); >> > + >> > + if (value->bit_size == 32) { >> > + return nir_vec4(b, value, extend, extend, extend); >> > + } else { >> > + assert(value->bit_size == 64); >> > + return nir_vec4(b, nir_unpack_64_2x32_split_x(b, value), >> > + nir_unpack_64_2x32_split_y(b, value), >> > + extend, extend); >> > + } >> > + } else { >> > + /* GLSL uses a uint64_t for ballot values */ >> > + assert(num_components == 1); >> > + assert(bit_size == 64); >> > + >> > + if (value->bit_size == 32) { >> > + return nir_pack_64_2x32_split(b, value, extend); >> > + } else { >> > + assert(value->bit_size == 64); >> > + return value; >> > + } >> > + } >> > +} >> > + >> > static nir_ssa_def * >> > lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr >> > *intrin) >> > { >> > @@ -86,24 +123,78 @@ lower_subgroups_intrin(nir_builder *b, >> > nir_intrinsic_instr *intrin, >> > if (!options->lower_subgroup_masks) >> > return NULL; >> > >> > + uint64_t mask; >> > + switch (intrin->intrinsic) { >> > + case nir_intrinsic_load_subgroup_eq_mask: >> > + mask = 1ull; >> > + break; >> > + case nir_intrinsic_load_subgroup_ge_mask: >> > + case nir_intrinsic_load_subgroup_lt_mask: >> > + mask = ~0ull; >> > + break; >> > + case nir_intrinsic_load_subgroup_gt_mask: >> > + case nir_intrinsic_load_subgroup_le_mask: >> > + mask = ~1ull; >> > + break; >> > + default: >> > + unreachable("you seriously can't tell this is >> > unreachable?"); >> > + } >> > + >> > nir_ssa_def *count = nir_load_subgroup_invocation(b); >> > + nir_ssa_def *shifted; >> > + if (options->ballot_bit_size == 32 && intrin- >> > >dest.ssa.bit_size == 32) { >> >> Maybe add a comment here stating that this is the SPIR-V path where we >> always expect uvec4 results. >> > > Sure. > > >> > + assert(intrin->dest.ssa.num_components == 4); >> > + shifted = nir_ishl(b, nir_imm_int(b, mask), count); >> > + } else { >> > + /* We're either working with 64-bit types natively or we're >> > in OpenGL >> > + * where we want a uint64_t as our final value. In either >> > case we >> > + * know that we have 64-bit types. In the first case, we >> > need to use >> > + * 64 bits because of the native subgroup size. In the >> > second, we >> > + * want a 64-bit result and a 64-bit shift is likely more >> > efficient >> > + * than messing around with 32-bit shifts and packing. >> > + */ >> > + assert(options->ballot_bit_size == 64 || >> > + intrin->dest.ssa.bit_size == 64); >> > + shifted = nir_ishl(b, nir_imm_int64(b, mask), count); >> > + } >> >> >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev