From: Richard Henderson <r...@twiddle.net>

---
 gcc/config/rs6000/altivec.md                      |   13 ++
 gcc/config/rs6000/rs6000-protos.h                 |    1 +
 gcc/config/rs6000/rs6000.c                        |  175 +++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c |   76 +++++++++
 gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c |   19 +++
 gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c |   13 ++
 6 files changed, 297 insertions(+), 0 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index a3a8d77..7797b65 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1366,6 +1366,19 @@
   "TARGET_ALTIVEC"
   "")
 
+(define_expand "vec_perm_constv16qi"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:V16QI 2 "register_operand" "")
+   (match_operand:V16QI 3 "" "")]
+  "TARGET_ALTIVEC"
+{
+  if (altivec_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_insn "altivec_vrfip"           ; ceil
   [(set (match_operand:V4SF 0 "register_operand" "=v")
         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 4650152..f2ed084 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -55,6 +55,7 @@ extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
 extern void rs6000_expand_vector_extract (rtx, rtx, int);
+extern bool altivec_expand_vec_perm_const (rtx op[4]);
 extern void build_mask64_2_operands (rtx, rtx *);
 extern int expand_block_clear (rtx[]);
 extern int expand_block_move (rtx[]);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 46ad820..9be155d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -26202,6 +26202,181 @@ rs6000_emit_parity (rtx dst, rtx src)
     }
 }
 
+/* Expand an Altivec constant permutation.  Return true if we match
+   an efficient implementation; false to fall back to VPERM.  */
+
+bool
+altivec_expand_vec_perm_const (rtx operands[4])
+{
+  struct altivec_perm_insn {
+    enum insn_code impl;
+    unsigned char perm[16];
+  };
+  static const struct altivec_perm_insn patterns[] = {
+    { CODE_FOR_altivec_vpkuhum,
+      {  1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
+    { CODE_FOR_altivec_vpkuwum,
+      {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
+    { CODE_FOR_altivec_vmrghb,
+      {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
+    { CODE_FOR_altivec_vmrghh,
+      {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
+    { CODE_FOR_altivec_vmrghw,
+      {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
+    { CODE_FOR_altivec_vmrglb,
+      {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
+    { CODE_FOR_altivec_vmrglh,
+      {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
+    { CODE_FOR_altivec_vmrglw,
+      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+  };
+
+  unsigned int i, j, elt, which;
+  unsigned char perm[16];
+  rtx target, op0, op1, sel, x;
+  bool one_vec;
+
+  target = operands[0];
+  op0 = operands[1];
+  op1 = operands[2];
+  sel = operands[3];
+
+  /* Unpack the constant selector.  */
+  for (i = which = 0; i < 16; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      elt = INTVAL (e) & 31;
+      which |= (elt < 16 ? 1 : 2);
+      perm[i] = elt;
+    }
+
+  /* Simplify the constant selector based on operands.  */
+  switch (which)
+    {
+    default:
+      gcc_unreachable ();
+
+    case 3:
+      one_vec = false;
+      if (!rtx_equal_p (op0, op1))
+       break;
+
+      /* Fold the permutation into a single vector.  */
+      for (i = 0; i < 16; ++i)
+       if (perm[i] >= 16)
+         perm[i] -= 16;
+      /* FALLTHRU */
+
+    case 1:
+      op1 = op0;
+      one_vec = true;
+      break;
+
+    case 2:
+      for (i = 0; i < 16; ++i)
+       perm[i] -= 16;
+      op0 = op1;
+      one_vec = true;
+      break;
+    }
+ 
+  /* Look for splat patterns.  */
+  if (one_vec)
+    {
+      elt = perm[0];
+
+      for (i = 0; i < 16; ++i)
+       if (perm[i] != elt)
+         break;
+      if (i == 16)
+       {
+         emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
+         return true;
+       }
+
+      if (elt % 2 == 0)
+       {
+         for (i = 0; i < 16; i += 2)
+           if (perm[i] != elt || perm[i + 1] != elt + 1)
+             break;
+         if (i == 16)
+           {
+             x = gen_reg_rtx (V8HImode);
+             emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0),
+                                            GEN_INT (elt / 2)));
+             emit_move_insn (target, gen_lowpart (V16QImode, x));
+             return true;
+           }
+       }
+
+      if (elt % 4 == 0)
+       {
+         for (i = 0; i < 16; i += 4)
+           if (perm[i] != elt
+               || perm[i + 1] != elt + 1
+               || perm[i + 2] != elt + 2
+               || perm[i + 3] != elt + 3)
+             break;
+         if (i == 16)
+           {
+             x = gen_reg_rtx (V4SImode);
+             emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0),
+                                            GEN_INT (elt / 4)));
+             emit_move_insn (target, gen_lowpart (V16QImode, x));
+             return true;
+           }
+       }
+    }
+
+  /* Look for merge and pack patterns.  */
+  for (j = 0; j < ARRAY_SIZE (patterns); ++j)
+    {
+      bool swapped;
+
+      elt = patterns[j].perm[0];
+      if (perm[0] == elt)
+       swapped = false;
+      else if (perm[0] == elt + 16)
+       swapped = true;
+      else
+       continue;
+      for (i = 1; i < 16; ++i)
+       {
+         elt = patterns[j].perm[i];
+         if (swapped)
+           elt = (elt >= 16 ? elt - 16 : elt + 16);
+         else if (one_vec && elt >= 16)
+           elt -= 16;
+         if (perm[i] != elt)
+           break;
+       }
+      if (i == 16)
+       {
+         enum insn_code icode = patterns[j].impl;
+         enum machine_mode omode = insn_data[icode].operand[0].mode;
+         enum machine_mode imode = insn_data[icode].operand[1].mode;
+
+         if (swapped)
+           x = op0, op0 = op1, op1 = x;
+         if (imode != V16QImode)
+           {
+             op0 = gen_lowpart (imode, op0);
+             op1 = gen_lowpart (imode, op1);
+           }
+         if (omode == V16QImode)
+           x = target;
+         else
+           x = gen_reg_rtx (omode);
+         emit_insn (GEN_FCN (icode) (x, op0, op1));
+         if (omode != V16QImode)
+           emit_move_insn (target, gen_lowpart (V16QImode, x));
+         return true;
+       }
+    }
+
+  return false;
+}
+
 /* Return an RTX representing where to find the function value of a
    function returning MODE.  */
 static rtx
diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c 
b/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c
new file mode 100644
index 0000000..ee5c5ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O -maltivec -mno-vsx" } */
+
+typedef unsigned char V __attribute__((vector_size(16)));
+
+V b1(V x)
+{
+  return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, });
+}
+
+V b2(V x)
+{
+  return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, 2,3,2,3, 2,3,2,3, });
+}
+
+V b4(V x)
+{
+  return __builtin_shuffle(x, (V){ 4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7, });
+}
+
+V p2(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+
+}
+
+V p4(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 });
+}
+
+V h1(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 });
+}
+
+V h2(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 });
+}
+
+V h4(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 });
+}
+
+V l1(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+}
+
+V l2(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 });
+}
+
+V l4(V x, V y)
+{
+  return __builtin_shuffle(x, y,
+       (V){ 8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 });
+}
+
+/* { dg-final { scan-assembler-not "vperm" } } */
+/* { dg-final { scan-assembler "vspltb" } } */
+/* { dg-final { scan-assembler "vsplth" } } */
+/* { dg-final { scan-assembler "vspltw" } } */
+/* { dg-final { scan-assembler "vpkuhum" } } */
+/* { dg-final { scan-assembler "vpkuwum" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c 
b/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c
new file mode 100644
index 0000000..1b90bb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O -maltivec -mno-vsx" } */
+
+typedef unsigned short V __attribute__((vector_size(16)));
+
+V f2(V x)
+{
+  return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, });
+}
+
+V f4(V x)
+{
+  return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, });
+}
+
+/* { dg-final { scan-assembler-not "vperm" } } */
+/* { dg-final { scan-assembler "vsplth" } } */
+/* { dg-final { scan-assembler "vspltw" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c 
b/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c
new file mode 100644
index 0000000..9598edf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O -maltivec -mno-vsx" } */
+
+typedef unsigned int V __attribute__((vector_size(16)));
+
+V f4(V x)
+{
+  return __builtin_shuffle(x, (V){ 1,1,1,1, });
+}
+
+/* { dg-final { scan-assembler-not "vperm" } } */
+/* { dg-final { scan-assembler "vspltw" } } */
-- 
1.7.7.3

Reply via email to