This patch adds support for two new Power9 instructions, xxpermr and
vpermr, providing more efficient vector permutation operations on
little-endian configurations. These new instructions are described in
the Power ISA 3.0 document. Selection of the new instructions is
conditioned upon TARGET_P9_VECTOR and !VECTOR_ELT_ORDER_BIG.
The patch has bootstrapped and tested on powerpc64le-unknown-linux-gnu
and powerpc64-unknown-linux-gnu with no regressions. Is this ok for GCC
7 when stage 1 opens?
(A previous version of this patch was distributed and approved, but
further experience with testing of P9 fusion instructions revealed a
problem with that particular code expansion. So this new revision of
the patch omits the fusion instruction generation pattern.)
Thanks.
gcc/testsuite/ChangeLog:
2016-03-17 Kelvin Nilsen <kel...@gcc.gnu.org>
* gcc.target/powerpc/p9-permute.c: Generalize test to run on
big-endian Power9 in addition to little-endian Power9.
* gcc.target/powerpc/p9-vpermr.c: New test.
gcc/ChangeLog:
2016-03-17 Kelvin Nilsen <kel...@gcc.gnu.org>
* config/rs6000/altivec.md: (UNSPEC_VPERMR): New unspec
constant.
(*altivecvpermr_<mode>_internal): New insn.
* config/rs6000/rs6000.c (rs6000_expand_vector_set): If
!BYTES_BIG_ENDIAN and TARGET_P9_VECTOR, expand using template
that translates into new xxpermr or vpermr instructions.
(altivec_expand_vec_perm_le): if TARGET_P9_VECTOR, expand using
template that translates into new xxpermr or vpermr
instructions.
--
Kelvin Nilsen, Ph.D. kdnil...@linux.vnet.ibm.com
home office: 801-756-4821, cell: 520-991-6727
IBM Linux Technology Center - PPC Toolchain
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md (revision 233539)
+++ gcc/config/rs6000/altivec.md (working copy)
@@ -58,6 +58,7 @@
UNSPEC_VSUM2SWS
UNSPEC_VSUMSWS
UNSPEC_VPERM
+ UNSPEC_VPERMR
UNSPEC_VPERM_UNS
UNSPEC_VRFIN
UNSPEC_VCFUX
@@ -1962,6 +1963,19 @@
[(set_attr "type" "vecperm")
(set_attr "length" "4,4,8")])
+(define_insn "*altivec_vpermr_<mode>_internal"
+ [(set (match_operand:VM 0 "register_operand" "=v,?wo")
+ (unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
+ (match_operand:VM 2 "register_operand" "v,wo")
+ (match_operand:V16QI 3 "register_operand" "v,wo")]
+ UNSPEC_VPERMR))]
+ "TARGET_P9_VECTOR"
+ "@
+ vpermr %0,%1,%2,%3
+ xxpermr %x0,%x2,%x3"
+ [(set_attr "type" "vecperm")
+ (set_attr "length" "4,4")])
+
(define_insn "altivec_vperm_v8hiv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo")
(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 233539)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -6553,19 +6553,27 @@ rs6000_expand_vector_set (rtx target, rtx val, int
UNSPEC_VPERM);
else
{
- /* Invert selector. We prefer to generate VNAND on P8 so
- that future fusion opportunities can kick in, but must
- generate VNOR elsewhere. */
- rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
- rtx iorx = (TARGET_P8_VECTOR
- ? gen_rtx_IOR (V16QImode, notx, notx)
- : gen_rtx_AND (V16QImode, notx, notx));
- rtx tmp = gen_reg_rtx (V16QImode);
- emit_insn (gen_rtx_SET (tmp, iorx));
-
- /* Permute with operands reversed and adjusted selector. */
- x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
- UNSPEC_VPERM);
+ if (TARGET_P9_VECTOR)
+ x = gen_rtx_UNSPEC (mode,
+ gen_rtvec (3, target, reg,
+ force_reg (V16QImode, x)),
+ UNSPEC_VPERMR);
+ else
+ {
+ /* Invert selector. We prefer to generate VNAND on P8 so
+ that future fusion opportunities can kick in, but must
+ generate VNOR elsewhere. */
+ rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
+ rtx iorx = (TARGET_P8_VECTOR
+ ? gen_rtx_IOR (V16QImode, notx, notx)
+ : gen_rtx_AND (V16QImode, notx, notx));
+ rtx tmp = gen_reg_rtx (V16QImode);
+ emit_insn (gen_rtx_SET (tmp, iorx));
+
+ /* Permute with operands reversed and adjusted selector. */
+ x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
+ UNSPEC_VPERM);
+ }
}
emit_insn (gen_rtx_SET (target, x));
@@ -33421,18 +33429,26 @@ altivec_expand_vec_perm_le (rtx operands[4])
if (!REG_P (target))
tmp = gen_reg_rtx (mode);
- /* Invert the selector with a VNAND if available, else a VNOR.
- The VNAND is preferred for future fusion opportunities. */
- notx = gen_rtx_NOT (V16QImode, sel);
- iorx = (TARGET_P8_VECTOR
- ? gen_rtx_IOR (V16QImode, notx, notx)
- : gen_rtx_AND (V16QImode, notx, notx));
- emit_insn (gen_rtx_SET (norreg, iorx));
+ if (TARGET_P9_VECTOR)
+ {
+ unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
+ UNSPEC_VPERMR);
+ }
+ else
+ {
+ /* Invert the selector with a VNAND if available, else a VNOR.
+ The VNAND is preferred for future fusion opportunities. */
+ notx = gen_rtx_NOT (V16QImode, sel);
+ iorx = (TARGET_P8_VECTOR
+ ? gen_rtx_IOR (V16QImode, notx, notx)
+ : gen_rtx_AND (V16QImode, notx, notx));
+ emit_insn (gen_rtx_SET (norreg, iorx));
+
+ /* Permute with operands reversed and adjusted selector. */
+ unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
+ UNSPEC_VPERM);
+ }
- /* Permute with operands reversed and adjusted selector. */
- unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
- UNSPEC_VPERM);
-
/* Copy into target, possibly by way of a register. */
if (!REG_P (target))
{
Index: gcc/testsuite/gcc.target/powerpc/p9-permute.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-permute.c (revision 233539)
+++ gcc/testsuite/gcc.target/powerpc/p9-permute.c (working copy)
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-do compile { target { powerpc64*-*-* } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } {
"-mcpu=power9" } } */
/* { dg-options "-mcpu=power9 -O2" } */
@@ -16,5 +16,6 @@ permute (vector long long *p, vector long long *q,
return vec_perm (a, b, mask);
}
+/* expect xxpermr on little-endian, xxperm on big-endian */
/* { dg-final { scan-assembler "xxperm" } } */
/* { dg-final { scan-assembler-not "vperm" } } */
Index: gcc/testsuite/gcc.target/powerpc/p9-vpermr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-vpermr.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-vpermr.c (revision 234260)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } {
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+
+vector long long
+permute (vector long long *p, vector long long *q, vector unsigned char mask)
+{
+ vector long long a = *p;
+ vector long long b = *q;
+
+ /* Force a, b to be in altivec registers to select vpermr insn. */
+ __asm__ (" # a: %x0, b: %x1" : "+v" (a), "+v" (b));
+
+ return vec_perm (a, b, mask);
+}
+
+/* { dg-final { scan-assembler "vpermr" } } */