Re: [Mesa-dev] [PATCH 3/7] ac: lower 64bit subgroup intrinsics

2018-03-08 Thread Michael Schellenberger Costa

Hi Daniel,


Am 08.03.2018 um 18:10 schrieb Daniel Schürmann:

---
  src/amd/common/ac_lower_subgroups.c | 50 ++---
  1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_lower_subgroups.c 
b/src/amd/common/ac_lower_subgroups.c
index d0782b481b..2be48e2ba1 100644
--- a/src/amd/common/ac_lower_subgroups.c
+++ b/src/amd/common/ac_lower_subgroups.c
@@ -26,9 +26,45 @@
  
  #include "ac_nir_to_llvm.h"
  
+static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, nir_intrinsic_instr *intrin) {

+   assert(intrin->src[0].ssa->bit_size == 64);
+   nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+   nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

The extra space looks before x/y looks wrong.

+   nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_ssa_dest_init(_x->instr, _x->dest, 1, 32, NULL);
+   nir_ssa_dest_init(_y->instr, _y->dest, 1, 32, NULL);
+   intr_x->src[0] = nir_src_for_ssa(x);
+   intr_y->src[0] = nir_src_for_ssa(y);
+   intr_x->const_index[0] = intr_y->const_index[0] = 
intrin->const_index[0];
+   intr_x->const_index[1] = intr_y->const_index[1] = 
intrin->const_index[1];
+   if (intrin->intrinsic == nir_intrinsic_read_invocation ||
+   intrin->intrinsic == nir_intrinsic_shuffle ||
+   intrin->intrinsic == nir_intrinsic_quad_broadcast) {

Indentation is off for the other conditions.

+   nir_src_copy(_x->src[1], >src[1], intr_x);
+   nir_src_copy(_y->src[1], >src[1], intr_y);
+   }
+   intr_x->num_components = 1;
+   intr_y->num_components = 1;
+   nir_builder_instr_insert(b, _x->instr);
+   nir_builder_instr_insert(b, _y->instr);
+   return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
+}


That said could you make a helper function:

static nir_intrinsic_instr 
*ac_lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr 
*intrin, unsigned int component) {
nir_ssa_def *comp;
if (component == 0)
comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
else
    comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);

nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
intr->src[0] = nir_src_for_ssa(comp);

intr->const_index[0] = intrin->const_index[0];
intr->const_index[1] = intrin->const_index[1];
if (intrin->intrinsic == nir_intrinsic_read_invocation ||
intrin->intrinsic == nir_intrinsic_shuffle ||
intrin->intrinsic == nir_intrinsic_quad_broadcast) {
nir_src_copy(>src[1], >src[1], intr);
}
intr->num_components = 1;
return intr;
}

And then simplify into:

static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, 
nir_intrinsic_instr *intrin) {
assert(intrin->src[0].ssa->bit_size == 64);
nir_intrinsic_instr *intr_x = 
ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 0);
nir_intrinsic_instr *intr_y = 
ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 1);

nir_builder_instr_insert(b, _x->instr);
nir_builder_instr_insert(b, _y->instr);
return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
}

--Michael


+
  static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_instr *intrin)
  {
switch(intrin->intrinsic) {
+   case nir_intrinsic_read_invocation:
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_shuffle:
+   case nir_intrinsic_quad_broadcast:
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   if (intrin->src[0].ssa->bit_size == 64)
+   return ac_lower_subgroups_64bit(b, intrin);
+   else
+   return NULL;
case nir_intrinsic_vote_ieq:
case nir_intrinsic_vote_feq: {
nir_intrinsic_instr *rfi =
@@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder 
*b, nir_intrinsic_inst
  1, intrin->src[0].ssa->bit_size, NULL);
nir_src_copy(>src[0], >src[0], rfi);
rfi->num_components = 1;
-
+   nir_ssa_def *first_lane;
+   if (intrin->src[0].ssa->bit_size == 64) {
+   first_lane = ac_lower_subgroups_64bit(b, rfi);
+   } else {
+   nir_builder_instr_insert(b, >instr);
+   first_lane = >dest.ssa;
+   }
nir_ssa_def *is_ne;
if (intrin->intrinsic == 

[Mesa-dev] [PATCH 3/7] ac: lower 64bit subgroup intrinsics

2018-03-08 Thread Daniel Schürmann
---
 src/amd/common/ac_lower_subgroups.c | 50 ++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_lower_subgroups.c 
b/src/amd/common/ac_lower_subgroups.c
index d0782b481b..2be48e2ba1 100644
--- a/src/amd/common/ac_lower_subgroups.c
+++ b/src/amd/common/ac_lower_subgroups.c
@@ -26,9 +26,45 @@
 
 #include "ac_nir_to_llvm.h"
 
+static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, 
nir_intrinsic_instr *intrin) {
+   assert(intrin->src[0].ssa->bit_size == 64);
+   nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+   nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
+   nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_ssa_dest_init(_x->instr, _x->dest, 1, 32, NULL);
+   nir_ssa_dest_init(_y->instr, _y->dest, 1, 32, NULL);
+   intr_x->src[0] = nir_src_for_ssa(x);
+   intr_y->src[0] = nir_src_for_ssa(y);
+   intr_x->const_index[0] = intr_y->const_index[0] = 
intrin->const_index[0];
+   intr_x->const_index[1] = intr_y->const_index[1] = 
intrin->const_index[1];
+   if (intrin->intrinsic == nir_intrinsic_read_invocation ||
+   intrin->intrinsic == nir_intrinsic_shuffle ||
+   intrin->intrinsic == nir_intrinsic_quad_broadcast) {
+   nir_src_copy(_x->src[1], >src[1], intr_x);
+   nir_src_copy(_y->src[1], >src[1], intr_y);
+   }
+   intr_x->num_components = 1;
+   intr_y->num_components = 1;
+   nir_builder_instr_insert(b, _x->instr);
+   nir_builder_instr_insert(b, _y->instr);
+   return nir_pack_64_2x32_split(b, _x->dest.ssa, _y->dest.ssa);
+}
+
 static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_instr *intrin)
 {
switch(intrin->intrinsic) {
+   case nir_intrinsic_read_invocation:
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_shuffle:
+   case nir_intrinsic_quad_broadcast:
+   case nir_intrinsic_quad_swap_horizontal:
+   case nir_intrinsic_quad_swap_vertical:
+   case nir_intrinsic_quad_swap_diagonal:
+   if (intrin->src[0].ssa->bit_size == 64)
+   return ac_lower_subgroups_64bit(b, intrin);
+   else
+   return NULL;
case nir_intrinsic_vote_ieq:
case nir_intrinsic_vote_feq: {
nir_intrinsic_instr *rfi =
@@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder 
*b, nir_intrinsic_inst
  1, intrin->src[0].ssa->bit_size, NULL);
nir_src_copy(>src[0], >src[0], rfi);
rfi->num_components = 1;
-
+   nir_ssa_def *first_lane;
+   if (intrin->src[0].ssa->bit_size == 64) {
+   first_lane = ac_lower_subgroups_64bit(b, rfi);
+   } else {
+   nir_builder_instr_insert(b, >instr);
+   first_lane = >dest.ssa;
+   }
nir_ssa_def *is_ne;
if (intrin->intrinsic == nir_intrinsic_vote_feq)
-   is_ne = nir_fne(b, >dest.ssa, intrin->src[0].ssa);
+   is_ne = nir_fne(b, first_lane, intrin->src[0].ssa);
else
-   is_ne = nir_ine(b, >dest.ssa, intrin->src[0].ssa);
+   is_ne = nir_ine(b, first_lane, intrin->src[0].ssa);
 
nir_intrinsic_instr *ballot =
nir_intrinsic_instr_create(b->shader, 
nir_intrinsic_ballot);
@@ -50,7 +92,7 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_inst
  1, 64, NULL);
ballot->src[0] = nir_src_for_ssa(is_ne);
ballot->num_components = 1;
-
+   nir_builder_instr_insert(b, >instr);
return nir_ieq(b, >dest.ssa, nir_imm_int64(b, 0));
}
default:
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev