The patch gets rid of the unspec used for the vector permute double immediate instruction and replaces it with generic rtx.
gcc/ChangeLog: * config/s390/s390.md (UNSPEC_VEC_PERMI): Remove constant definition. * config/s390/vector.md (*vpdi1<mode>, *vpdi4<mode>): New pattern definitions. * config/s390/vx-builtins.md (*vec_permi<mode>): Emit generic rtx instead of an unspec. gcc/testsuite/ChangeLog: * gcc.target/s390/zvector/vec-permi.c: Removed. * gcc.target/s390/zvector/vec_permi.c: New test. --- gcc/config/s390/s390.md | 1 - gcc/config/s390/vector.md | 26 ++++++++ gcc/config/s390/vx-builtins.md | 26 +++----- .../gcc.target/s390/zvector/vec-permi.c | 54 --------------- .../gcc.target/s390/zvector/vec_permi.c | 66 +++++++++++++++++++ 5 files changed, 102 insertions(+), 71 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-permi.c create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_permi.c diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index d896faee0fb..1b894a926ce 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -166,7 +166,6 @@ (define_c_enum "unspec" [ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC UNSPEC_VEC_PERM - UNSPEC_VEC_PERMI UNSPEC_VEC_EXTEND UNSPEC_VEC_STORE_LEN UNSPEC_VEC_STORE_LEN_R diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 7507aec1c8e..6a6370b5275 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -767,6 +767,32 @@ (define_insn "*vec_perm<mode>" "vperm\t%v0,%v1,%v2,%v3" [(set_attr "op_type" "VRR")]) + +; First DW of op1 and second DW of op2 +(define_insn "*vpdi1<mode>" + [(set (match_operand:V_HW_2 0 "register_operand" "=v") + (vec_select:V_HW_2 + (vec_concat:<vec_2x_nelts> + (match_operand:V_HW_2 1 "register_operand" "v") + (match_operand:V_HW_2 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 3)])))] + "TARGET_VX" + "vpdi\t%v0,%v1,%v2,1" + [(set_attr "op_type" "VRR")]) + +; Second DW of op1 and first of op2 +(define_insn "*vpdi4<mode>" + [(set (match_operand:V_HW_2 0 "register_operand" "=v") + (vec_select:V_HW_2 + (vec_concat:<vec_2x_nelts> + (match_operand:V_HW_2 1 "register_operand" "v") + (match_operand:V_HW_2 2 "register_operand" "v")) + (parallel [(const_int 1) (const_int 2)])))] + "TARGET_VX" + "vpdi\t%v0,%v1,%v2,4" + [(set_attr "op_type" "VRR")]) + + (define_insn "*vmrhb" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_select:V16QI diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index 5abe43b9e53..3799e833187 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -403,28 +403,22 @@ (define_insn "vec_zperm<mode>" "vperm\t%v0,%v1,%v2,%v3" [(set_attr "op_type" "VRR")]) +; Incoming op3 is in vec_permi format and will we turned into a +; permute vector consisting of op3 and op4. (define_expand "vec_permi<mode>" - [(set (match_operand:V_HW_64 0 "register_operand" "") - (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "") - (match_operand:V_HW_64 2 "register_operand" "") - (match_operand:QI 3 "const_mask_operand" "")] - UNSPEC_VEC_PERMI))] + [(set (match_operand:V_HW_2 0 "register_operand" "") + (vec_select:V_HW_2 + (vec_concat:<vec_2x_nelts> + (match_operand:V_HW_2 1 "register_operand" "") + (match_operand:V_HW_2 2 "register_operand" "")) + (parallel [(match_operand:QI 3 "const_mask_operand" "") (match_dup 4)])))] "TARGET_VX" { HOST_WIDE_INT val = INTVAL (operands[3]); - operands[3] = GEN_INT ((val & 1) | (val & 2) << 1); + operands[3] = GEN_INT ((val & 2) >> 1); + operands[4] = GEN_INT ((val & 1) + 2); }) -(define_insn "*vec_permi<mode>" - [(set (match_operand:V_HW_64 0 "register_operand" "=v") - (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "v") - (match_operand:V_HW_64 2 "register_operand" "v") - (match_operand:QI 3 "const_mask_operand" "C")] - UNSPEC_VEC_PERMI))] - "TARGET_VX && (UINTVAL (operands[3]) & 10) == 0" - "vpdi\t%v0,%v1,%v2,%b3" - [(set_attr "op_type" "VRR")]) - ; Vector replicate diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c b/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c deleted file mode 100644 index c0a852b9703..00000000000 --- a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c +++ /dev/null @@ -1,54 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O3 -march=z13 -mzarch --save-temps" } */ -/* { dg-do run { target { s390_z13_hw } } } */ - -/* - * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte - * vector elements in each of a and b, depending on the value of c. The valid - * values for c differ from the encoding for the M4 field in assembly and in the - * binary instruction. - * - * selection | c | encoding in assembly - * a[0] b[0] | 0 | 0 - * a[0] b[1] | 1 | 1 - * a[1] b[0] | 2 | 4 - * a[1] b[1] | 3 | 5 - * - * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the - * M4 field as (i<<2) | j. - */ -#include <assert.h> -#include <vecintrin.h> - -typedef unsigned long long uv2di __attribute__((vector_size(16))); - -__attribute__ ((noipa)) static uv2di -do_vec_permi(uv2di a, uv2di b, int c) -{ - switch(c) { - case 0: return vec_permi(a, b, 0); - case 1: return vec_permi(a, b, 1); - case 2: return vec_permi(a, b, 2); - case 3: return vec_permi(a, b, 3); - default: assert(0); - } -} - -/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,1\n} 1 } } */ -/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,4\n} 1 } } */ -/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,5\n} 1 } } */ - -int -main (void) -{ - uv2di a = { 0xa0, 0xa1 }; - uv2di b = { 0xb0, 0xb1 }; - - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) { - uv2di res = do_vec_permi(a, b, (i<<1)|j); - assert(res[0] == a[i]); - assert(res[1] == b[j]); - } -} diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec_permi.c b/gcc/testsuite/gcc.target/s390/zvector/vec_permi.c new file mode 100644 index 00000000000..b66fa905dd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec_permi.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z13 -mzvector --save-temps" } */ +/* { dg-do run { target { s390_z13_hw } } } */ + +/* + * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte + * vector elements in each of a and b, depending on the value of c. The valid + * values for c differ from the encoding for the M4 field in assembly and in the + * binary instruction. + * + * selection | c | encoding in assembly + * a[0] b[0] | 0 | 0 -> vmrhg + * a[0] b[1] | 1 | 1 + * a[1] b[0] | 2 | 4 + * a[1] b[1] | 3 | 5 -> vmrlg + * + * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the + * M4 field as (i<<2) | j. + */ + +/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tvpdi\t" 6 } } */ + +#include "vec-types.h" +#include <vecintrin.h> + +#define GEN_PERMI_BITS(VEC_TYPE, BITS) \ + VEC_TYPE __attribute__((noinline)) \ + permi_##BITS##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \ + return vec_permi (a, b, (BITS)); } + +#define GEN_PERMI(VEC_TYPE) \ + GEN_PERMI_BITS(VEC_TYPE, 0); \ + GEN_PERMI_BITS(VEC_TYPE, 1); \ + GEN_PERMI_BITS(VEC_TYPE, 2); \ + GEN_PERMI_BITS(VEC_TYPE, 3); + +GEN_PERMI(v2di) +GEN_PERMI(uv2di) +GEN_PERMI(v2df) + + +#define CHECK_PERMI_BITS(VEC_TYPE, BITS) \ + VEC_TYPE r##BITS = permi_##BITS##_##VEC_TYPE (a, b); \ + if (r##BITS[0] != ((BITS) & 2) >> 1 \ + || r##BITS[1] != ((BITS) & 1) + 2) \ + __builtin_abort(); + +#define CHECK_PERMI(VEC_TYPE) \ + { \ + VEC_TYPE a = GEN_SEQ_VEC (VEC_TYPE, 0); \ + VEC_TYPE b = GEN_SEQ_VEC (VEC_TYPE, 2); \ + CHECK_PERMI_BITS (VEC_TYPE, 0); \ + CHECK_PERMI_BITS (VEC_TYPE, 1); \ + CHECK_PERMI_BITS (VEC_TYPE, 2); \ + CHECK_PERMI_BITS (VEC_TYPE, 3); \ + } + +int +main () +{ + CHECK_PERMI (v2di); + CHECK_PERMI (uv2di); + CHECK_PERMI (v2df); +} -- 2.31.1