We use EXT both to implement vec_extract for large indices and as a
permute.  In both cases we can use MOVPRFX to handle the case in which
the first input and output can't be tied.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274515.

Richard


2019-08-15  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * config/aarch64/aarch64-sve.md (*vec_extract<mode><Vel>_ext)
        (*aarch64_sve_ext<mode>): Add MOVPRFX alternatives.

gcc/testsuite/
        * gcc.target/aarch64/sve/ext_2.c: Expect a MOVPRFX.
        * gcc.target/aarch64/sve/ext_3.c: New test.

Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md   2019-08-15 09:34:37.293987611 +0100
+++ gcc/config/aarch64/aarch64-sve.md   2019-08-15 09:36:18.953237055 +0100
@@ -1356,16 +1356,19 @@ (define_insn "*vec_extract<mode><Vel>_du
 ;; Extract an element outside the range of DUP.  This pattern requires the
 ;; source and destination to be the same.
 (define_insn "*vec_extract<mode><Vel>_ext"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+  [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
        (vec_select:<VEL>
-         (match_operand:SVE_ALL 1 "register_operand" "0")
+         (match_operand:SVE_ALL 1 "register_operand" "0, w")
          (parallel [(match_operand:SI 2 "const_int_operand")])))]
   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
   {
     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
-    return "ext\t%0.b, %0.b, %0.b, #%2";
+    return (which_alternative == 0
+           ? "ext\t%0.b, %0.b, %0.b, #%2"
+           : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
   }
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; -------------------------------------------------------------------------
@@ -4700,17 +4703,20 @@ (define_insn "aarch64_sve_<perm_insn><mo
 ;; Concatenate two vectors and extract a subvector.  Note that the
 ;; immediate (third) operand is the lane index not the byte index.
 (define_insn "*aarch64_sve_ext<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
-                        (match_operand:SVE_ALL 2 "register_operand" "w")
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0, w")
+                        (match_operand:SVE_ALL 2 "register_operand" "w, w")
                         (match_operand:SI 3 "const_int_operand")]
                        UNSPEC_EXT))]
   "TARGET_SVE
    && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
   {
     operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
-    return "ext\\t%0.b, %0.b, %2.b, #%3";
+    return (which_alternative == 0
+           ? "ext\\t%0.b, %0.b, %2.b, #%3"
+           : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
   }
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; -------------------------------------------------------------------------
Index: gcc/testsuite/gcc.target/aarch64/sve/ext_2.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/ext_2.c        2019-03-08 
18:14:29.776994751 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/ext_2.c        2019-08-15 
09:36:18.953237055 +0100
@@ -14,5 +14,4 @@ foo (void)
   asm volatile ("" :: "w" (x));
 }
 
-/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */
-/* { dg-final { scan-assembler {\text\tz0\.b, z0\.b, z[01]\.b, #4\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\text\tz0\.b, z0\.b, z1\.b, 
#4\n} } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/ext_3.c
===================================================================
--- /dev/null   2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/ext_3.c        2019-08-15 
09:36:18.953237055 +0100
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=1024" } */
+
+typedef int vnx4si __attribute__((vector_size (128)));
+
+void
+foo (void)
+{
+  register int x asm ("z0");
+  register vnx4si y asm ("z1");
+
+  asm volatile ("" : "=w" (y));
+  x = y[21];
+  asm volatile ("" :: "w" (x));
+}
+
+/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\text\tz0\.b, z0\.b, z1\.b, 
#84\n} } } */

Reply via email to