On Fri, Mar 16, 2018 at 2:50 AM, Daniel Schürmann < [email protected]> wrote:
> Signed-off-by: Daniel Schürmann <[email protected]> > --- > src/compiler/nir/nir.h | 1 + > src/compiler/nir/nir_lower_subgroups.c | 83 > +++++++++++++++++++++++++++------- > 2 files changed, 67 insertions(+), 17 deletions(-) > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index 6a51b7c4ab..0e3c026efa 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2564,6 +2564,7 @@ typedef struct nir_lower_subgroups_options { > bool lower_vote_eq_to_ballot:1; > bool lower_subgroup_masks:1; > bool lower_shuffle:1; > + bool lower_shuffle_to_32bit:1; > bool lower_quad:1; > } nir_lower_subgroups_options; > > diff --git a/src/compiler/nir/nir_lower_subgroups.c > b/src/compiler/nir/nir_lower_subgroups.c > index 9dc7be7947..669168e830 100644 > --- a/src/compiler/nir/nir_lower_subgroups.c > +++ b/src/compiler/nir/nir_lower_subgroups.c > @@ -28,6 +28,37 @@ > * \file nir_opt_intrinsics.c > */ > > +static nir_intrinsic_instr > *ac_lower_subgroups_64bit_split_intrinsic(nir_builder > *b, nir_intrinsic_instr *intrin, unsigned int component) { > Please put "static nir_intrinsic_instr *" and the "{" each on their own line and wrap things so that we don't go over 80 characters. Also, please drop the ac_ prefix as this is no longer in radv code. > + nir_ssa_def *comp; > + if (component == 0) > + comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa); > + else > + comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa); > + > + nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, > intrin->intrinsic); > + nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL); > + intr->src[0] = nir_src_for_ssa(comp); > + > + intr->const_index[0] = intrin->const_index[0]; > + intr->const_index[1] = intrin->const_index[1]; > + if (intrin->intrinsic == nir_intrinsic_read_invocation || > + intrin->intrinsic == nir_intrinsic_shuffle || > + intrin->intrinsic == nir_intrinsic_quad_broadcast) { > You can use nir_intrinsic_infos[intrin->intrinsic].num_srcs to make this a bit more general. > + nir_src_copy(&intr->src[1], &intrin->src[1], intr); > + } > + intr->num_components = 1; > + nir_builder_instr_insert(b, &intr->instr); > + return intr; > +} > + > +static nir_ssa_def * > +lower_64bit_to_32bit(nir_builder *b, nir_intrinsic_instr *intrin) { "{" goes on it's own line. Also, how about "lower_subgroup_op_to_32bit" to match "lower_subgroup_op_to_scalar" below. > + assert(intrin->src[0].ssa->bit_size == 64); > + nir_intrinsic_instr *intr_x = ac_lower_subgroups_64bit_split_intrinsic(b, > intrin, 0); > + nir_intrinsic_instr *intr_y = ac_lower_subgroups_64bit_split_intrinsic(b, > intrin, 1); > + return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa); > +} > + > static nir_ssa_def * > ballot_type_to_uint(nir_builder *b, nir_ssa_def *value, unsigned > bit_size) > { > @@ -80,7 +111,8 @@ uint_to_ballot_type(nir_builder *b, nir_ssa_def *value, > } > > static nir_ssa_def * > -lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin) > +lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin, > + bool lower_shuffle_to_32bit) > Just call this lower_to_32bit as it doesn't necessarily have anything to do with shuffles. Also, please align the parameter to the ( > { > /* This is safe to call on scalar things but it would be silly */ > assert(intrin->dest.ssa.num_components > 1); > @@ -107,9 +139,12 @@ lower_subgroup_op_to_scalar(nir_builder *b, > nir_intrinsic_instr *intrin) > chan_intrin->const_index[0] = intrin->const_index[0]; > chan_intrin->const_index[1] = intrin->const_index[1]; > > - nir_builder_instr_insert(b, &chan_intrin->instr); > - > - reads[i] = &chan_intrin->dest.ssa; > + if (lower_shuffle_to_32bit && chan_intrin->src[0].ssa->bit_size == > 64) { > + reads[i] = lower_64bit_to_32bit(b, chan_intrin); > + } else { > + nir_builder_instr_insert(b, &chan_intrin->instr); > + reads[i] = &chan_intrin->dest.ssa; > + } > } > > return nir_vec(b, reads, intrin->num_components); > @@ -158,13 +193,19 @@ lower_vote_eq_to_ballot(nir_builder *b, > nir_intrinsic_instr *intrin, > 1, value->bit_size, NULL); > rfi->num_components = 1; > rfi->src[0] = nir_src_for_ssa(nir_channel(b, value, i)); > - nir_builder_instr_insert(b, &rfi->instr); > + nir_ssa_def *first_lane; > + if (options->lower_shuffle_to_32bit && rfi->src[0].ssa->bit_size > == 64) { > I don't really see how read_first_invocation is related to shuffles > + first_lane = lower_64bit_to_32bit(b, rfi); > + } else { > + nir_builder_instr_insert(b, &rfi->instr); > + first_lane = &rfi->dest.ssa; > + } > > nir_ssa_def *is_eq; > if (intrin->intrinsic == nir_intrinsic_vote_feq) { > - is_eq = nir_feq(b, &rfi->dest.ssa, nir_channel(b, value, i)); > + is_eq = nir_feq(b, first_lane, nir_channel(b, value, i)); > } else { > - is_eq = nir_ieq(b, &rfi->dest.ssa, nir_channel(b, value, i)); > + is_eq = nir_ieq(b, first_lane, nir_channel(b, value, i)); > } > > if (all_eq == NULL) { > @@ -188,7 +229,7 @@ lower_vote_eq_to_ballot(nir_builder *b, > nir_intrinsic_instr *intrin, > > static nir_ssa_def * > lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin, > - bool lower_to_scalar) > + const nir_lower_subgroups_options *options) > Please align to the ( > { > nir_ssa_def *index = nir_load_subgroup_invocation(b); > switch (intrin->intrinsic) { > @@ -240,8 +281,10 @@ lower_shuffle(nir_builder *b, nir_intrinsic_instr > *intrin, > intrin->dest.ssa.num_components, > intrin->dest.ssa.bit_size, NULL); > > - if (lower_to_scalar && shuffle->num_components > 1) { > - return lower_subgroup_op_to_scalar(b, shuffle); > + if (options->lower_to_scalar && shuffle->num_components > 1) { > + return lower_subgroup_op_to_scalar(b, shuffle, > options->lower_shuffle_to_32bit); > + } else if (options->lower_shuffle_to_32bit && > shuffle->src[0].ssa->bit_size == 64) { > + return lower_64bit_to_32bit(b, shuffle); > } else { > nir_builder_instr_insert(b, &shuffle->instr); > return &shuffle->dest.ssa; > @@ -279,7 +322,9 @@ lower_subgroups_intrin(nir_builder *b, > nir_intrinsic_instr *intrin, > case nir_intrinsic_read_invocation: > case nir_intrinsic_read_first_invocation: > if (options->lower_to_scalar && intrin->num_components > 1) > - return lower_subgroup_op_to_scalar(b, intrin); > + return lower_subgroup_op_to_scalar(b, intrin, > options->lower_shuffle_to_32bit); > + else if (options->lower_shuffle_to_32bit && > intrin->src[0].ssa->bit_size == 64) > + return lower_64bit_to_32bit(b, intrin); > break; > > case nir_intrinsic_load_subgroup_eq_mask: > @@ -400,16 +445,18 @@ lower_subgroups_intrin(nir_builder *b, > nir_intrinsic_instr *intrin, > > case nir_intrinsic_shuffle: > if (options->lower_to_scalar && intrin->num_components > 1) > - return lower_subgroup_op_to_scalar(b, intrin); > + return lower_subgroup_op_to_scalar(b, intrin, > options->lower_shuffle_to_32bit); > + else if (options->lower_shuffle_to_32bit && > intrin->src[0].ssa->bit_size == 64) > This needs to be dedented. > + return lower_64bit_to_32bit(b, intrin); > break; > > case nir_intrinsic_shuffle_xor: > case nir_intrinsic_shuffle_up: > case nir_intrinsic_shuffle_down: > if (options->lower_shuffle) > - return lower_shuffle(b, intrin, options->lower_to_scalar); > + return lower_shuffle(b, intrin, options); > else if (options->lower_to_scalar && intrin->num_components > 1) > - return lower_subgroup_op_to_scalar(b, intrin); > + return lower_subgroup_op_to_scalar(b, intrin, > options->lower_shuffle_to_32bit); > I think you need an "else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)" case here as well. > break; > > case nir_intrinsic_quad_broadcast: > @@ -417,16 +464,18 @@ lower_subgroups_intrin(nir_builder *b, > nir_intrinsic_instr *intrin, > case nir_intrinsic_quad_swap_vertical: > case nir_intrinsic_quad_swap_diagonal: > if (options->lower_quad) > - return lower_shuffle(b, intrin, options->lower_to_scalar); > + return lower_shuffle(b, intrin, options); > else if (options->lower_to_scalar && intrin->num_components > 1) > - return lower_subgroup_op_to_scalar(b, intrin); > + return lower_subgroup_op_to_scalar(b, intrin, > options->lower_shuffle_to_32bit); > + else if (options->lower_shuffle_to_32bit && > intrin->src[0].ssa->bit_size == 64) > dedent, please. > + return lower_64bit_to_32bit(b, intrin); > break; > > case nir_intrinsic_reduce: > case nir_intrinsic_inclusive_scan: > case nir_intrinsic_exclusive_scan: > if (options->lower_to_scalar && intrin->num_components > 1) > - return lower_subgroup_op_to_scalar(b, intrin); > + return lower_subgroup_op_to_scalar(b, intrin, false); > break; > > default: > -- > 2.14.1 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
