--- src/amd/common/ac_lower_subgroups.c | 50 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-)
diff --git a/src/amd/common/ac_lower_subgroups.c b/src/amd/common/ac_lower_subgroups.c index d0782b481b..2be48e2ba1 100644 --- a/src/amd/common/ac_lower_subgroups.c +++ b/src/amd/common/ac_lower_subgroups.c @@ -26,9 +26,45 @@ #include "ac_nir_to_llvm.h" +static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, nir_intrinsic_instr *intrin) { + assert(intrin->src[0].ssa->bit_size == 64); + nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa); + nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa); + nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, intrin->intrinsic); + nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, intrin->intrinsic); + nir_ssa_dest_init(&intr_x->instr, &intr_x->dest, 1, 32, NULL); + nir_ssa_dest_init(&intr_y->instr, &intr_y->dest, 1, 32, NULL); + intr_x->src[0] = nir_src_for_ssa(x); + intr_y->src[0] = nir_src_for_ssa(y); + intr_x->const_index[0] = intr_y->const_index[0] = intrin->const_index[0]; + intr_x->const_index[1] = intr_y->const_index[1] = intrin->const_index[1]; + if (intrin->intrinsic == nir_intrinsic_read_invocation || + intrin->intrinsic == nir_intrinsic_shuffle || + intrin->intrinsic == nir_intrinsic_quad_broadcast) { + nir_src_copy(&intr_x->src[1], &intrin->src[1], intr_x); + nir_src_copy(&intr_y->src[1], &intrin->src[1], intr_y); + } + intr_x->num_components = 1; + intr_y->num_components = 1; + nir_builder_instr_insert(b, &intr_x->instr); + nir_builder_instr_insert(b, &intr_y->instr); + return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa); +} + static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin) { switch(intrin->intrinsic) { + case nir_intrinsic_read_invocation: + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_shuffle: + case nir_intrinsic_quad_broadcast: + case nir_intrinsic_quad_swap_horizontal: + case nir_intrinsic_quad_swap_vertical: + case nir_intrinsic_quad_swap_diagonal: + if (intrin->src[0].ssa->bit_size == 64) + return ac_lower_subgroups_64bit(b, intrin); + else + return NULL; case nir_intrinsic_vote_ieq: case nir_intrinsic_vote_feq: { nir_intrinsic_instr *rfi = @@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_inst 1, intrin->src[0].ssa->bit_size, NULL); nir_src_copy(&rfi->src[0], &intrin->src[0], rfi); rfi->num_components = 1; - + nir_ssa_def *first_lane; + if (intrin->src[0].ssa->bit_size == 64) { + first_lane = ac_lower_subgroups_64bit(b, rfi); + } else { + nir_builder_instr_insert(b, &rfi->instr); + first_lane = &rfi->dest.ssa; + } nir_ssa_def *is_ne; if (intrin->intrinsic == nir_intrinsic_vote_feq) - is_ne = nir_fne(b, &rfi->dest.ssa, intrin->src[0].ssa); + is_ne = nir_fne(b, first_lane, intrin->src[0].ssa); else - is_ne = nir_ine(b, &rfi->dest.ssa, intrin->src[0].ssa); + is_ne = nir_ine(b, first_lane, intrin->src[0].ssa); nir_intrinsic_instr *ballot = nir_intrinsic_instr_create(b->shader, nir_intrinsic_ballot); @@ -50,7 +92,7 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_inst 1, 64, NULL); ballot->src[0] = nir_src_for_ssa(is_ne); ballot->num_components = 1; - + nir_builder_instr_insert(b, &ballot->instr); return nir_ieq(b, &ballot->dest.ssa, nir_imm_int64(b, 0)); } default: -- 2.14.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev