https://gcc.gnu.org/g:dddb16c8170d2a92f105817eb00cf04bfaf54bd5

commit r16-4952-gdddb16c8170d2a92f105817eb00cf04bfaf54bd5
Author: Guo Jie <[email protected]>
Date:   Sun Nov 2 11:31:32 2025 +0800

    LoongArch: Eliminate unnecessary dependencies introduced by xvpermi.q
    
    1. When the selector is 0x0, 0x1, 0x10, or 0x11, the result of
    xvpermi.q does not depend on the output operand, thus eliminating
    the dependency chain of the output operand as input, which can
    reduce the number of instructions.
    
    2. When the selector is 0x22, 0x23, 0x32, or 0x33, the result of
    xvpermi.q does not depend on the second input operand, thus
    eliminating the dependency chain of the second input operand,
    which can also reduce the number of instructions.
    
    gcc/ChangeLog:
    
            * config/loongarch/lasx.md (lasx_xvpermi_q_<LASX:mode>):
            Add new splitter for optimization.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/loongarch/vec_pack_unpack_256.c: Adjust to changed
            lasx_xvpermi_q_<LASX:mode> template.
            * gcc.target/loongarch/vector/lasx/lasx-builtin.c: Ditto.
            * gcc.target/loongarch/lasx-xvpermi_q-opt.c: New test.

Diff:
---
 gcc/config/loongarch/lasx.md                       | 33 +++++++++++++++-
 .../gcc.target/loongarch/lasx-xvpermi_q-opt.c      | 44 ++++++++++++++++++++++
 .../gcc.target/loongarch/vec_pack_unpack_256.c     | 18 ++++++---
 .../loongarch/vector/lasx/lasx-builtin.c           |  2 +-
 4 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 7704f8c798e5..71dd25d0b5a2 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -515,7 +515,7 @@
    (set_attr "mode" "<MODE>")])
 
 ;; xvpermi.q
-(define_insn "lasx_xvpermi_q_<LASX:mode>"
+(define_insn_and_split "lasx_xvpermi_q_<LASX:mode>"
   [(set (match_operand:LASX 0 "register_operand" "=f")
        (unspec:LASX
          [(match_operand:LASX 1 "register_operand" "0")
@@ -525,6 +525,37 @@
   "ISA_HAS_LASX"
 {
   return "xvpermi.q\t%u0,%u2,%3";
+}
+  "&& ((INTVAL (operands[3]) & 0xee) == 0x0
+       || (INTVAL (operands[3]) & 0xee) == 0x22)"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT selector = INTVAL (operands[3]);
+  /* Reduce the dependency caused by using output operands[0] as input.  */
+  switch (INTVAL (operands[3]))
+    {
+    case 0x22:
+    case 0x23:
+    case 0x33:
+      selector -= 0x22;
+      operands[2] = operands[1];
+    /* FALLTHRU.  */
+    case 0x0:
+    case 0x1:
+    case 0x11:
+      emit_insn (gen_lasx_xvpermi_d_<mode> (operands[0], operands[2],
+                                           GEN_INT (selector * 0xa + 0x44)));
+      break;
+    case 0x10:
+      emit_move_insn (operands[0], operands[2]);
+      break;
+    case 0x32:
+      emit_move_insn (operands[0], operands[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
 }
   [(set_attr "type" "simd_splat")
    (set_attr "mode" "<MODE>")])
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c 
b/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c
new file mode 100644
index 000000000000..16fb9dfecdcc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/lasx-xvpermi_q-opt.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx -ftree-vectorize" } */
+
+#include <lasxintrin.h>
+
+#define TEST_FUNC(imm)                                                        \
+  __m256i                                                                     \
+  test_##imm (__m256i op0, __m256i op1)                                       \
+  {                                                                           \
+    return __lasx_xvpermi_q (op0, op1, imm);                                  \
+  }
+
+TEST_FUNC (0x00)
+/* { dg-final { scan-assembler-not "test_0x00:.*\txvld.*xvld.*-test_0x00"} } */
+/* { dg-final { scan-assembler-times "test_0x00:.*\txvpermi\\.d.*-test_0x00" 1 
} } */
+
+TEST_FUNC (0x01)
+/* { dg-final { scan-assembler-not "test_0x01:.*\txvld.*xvld.*-test_0x01"} } */
+/* { dg-final { scan-assembler-times "test_0x01:.*\txvpermi\\.d.*-test_0x01" 1 
} } */
+
+TEST_FUNC (0x10)
+/* { dg-final { scan-assembler-not "test_0x10:.*\txvld.*xvld.*-test_0x10"} } */
+/* { dg-final { scan-assembler-not "test_0x10:.*\txvpermi.*-test_0x10"} } */
+
+TEST_FUNC (0x11)
+/* { dg-final { scan-assembler-not "test_0x11:.*\txvld.*xvld.*-test_0x11"} } */
+/* { dg-final { scan-assembler-times "test_0x11:.*\txvpermi\\.d.*-test_0x11" 1 
} } */
+
+TEST_FUNC (0x22)
+/* { dg-final { scan-assembler-not "test_0x22:.*\txvld.*xvld.*-test_0x22"} } */
+/* { dg-final { scan-assembler-times "test_0x22:.*\txvpermi\\.d.*-test_0x22" 1 
} } */
+
+TEST_FUNC (0x23)
+/* { dg-final { scan-assembler-not "test_0x23:.*\txvld.*xvld.*-test_0x23"} } */
+/* { dg-final { scan-assembler-times "test_0x23:.*\txvpermi\\.d.*-test_0x23" 1 
} } */
+
+TEST_FUNC (0x32)
+/* { dg-final { scan-assembler-not "test_0x32:.*\txvld.*xvld.*-test_0x32"} } */
+/* { dg-final { scan-assembler-not "test_0x32:.*\txvpermi.*-test_0x32"} } */
+
+TEST_FUNC (0x33)
+/* { dg-final { scan-assembler-not "test_0x33:.*\txvld.*xvld.*-test_0x33"} } */
+/* { dg-final { scan-assembler-times "test_0x33:.*\txvpermi\\.d.*-test_0x33" 1 
} } */
+
diff --git a/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c 
b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c
index 506b7bdb03e9..5b2fd9b0599f 100644
--- a/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c
+++ b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c
@@ -55,7 +55,8 @@ test_vec_unpacks_float_hi_lo_v8si (void)
 }
 
 /* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_hi_lo_v8si" 
} } */
-/* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } 
*/
+/* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v8si" } } 
*/
+/* { dg-final { scan-assembler-not 
"test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } 
*/
 void
 test_vec_unpacks_hi_lo_v8si (void)
 {
@@ -64,7 +65,8 @@ test_vec_unpacks_hi_lo_v8si (void)
 }
 
 /* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v16hi:.*\tvext2xv\\.w\\.h.*-test_vec_unpacks_hi_lo_v16hi"
 } } */
-/* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } 
} */
+/* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v16hi" } 
} */
+/* { dg-final { scan-assembler-not 
"test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } 
} */
 void
 test_vec_unpacks_hi_lo_v16hi (void)
 {
@@ -73,7 +75,8 @@ test_vec_unpacks_hi_lo_v16hi (void)
 }
 
 /* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v32qi:.*\tvext2xv\\.h\\.b.*-test_vec_unpacks_hi_lo_v32qi"
 } } */
-/* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } 
} */
+/* { dg-final { scan-assembler 
"test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v32qi" } 
} */
+/* { dg-final { scan-assembler-not 
"test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } 
} */
 void
 test_vec_unpacks_hi_lo_v32qi (void)
 {
@@ -91,7 +94,8 @@ test_vec_unpacks_hi_lo_v8sf (void)
 }
 
 /* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v8si:.*\tvext2xv\\.du\\.wu.*-test_vec_unpacku_hi_lo_v8si"
 } } */
-/* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } 
*/
+/* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v8si" } } 
*/
+/* { dg-final { scan-assembler-not 
"test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } 
*/
 void
 test_vec_unpacku_hi_lo_v8si (void)
 {
@@ -100,7 +104,8 @@ test_vec_unpacku_hi_lo_v8si (void)
 }
 
 /* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v16hi:.*\tvext2xv\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v16hi"
 } } */
-/* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } 
} */
+/* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v16hi" } 
} */
+/* { dg-final { scan-assembler-not 
"test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } 
} */
 void
 test_vec_unpacku_hi_lo_v16hi (void)
 {
@@ -109,7 +114,8 @@ test_vec_unpacku_hi_lo_v16hi (void)
 }
 
 /* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v32qi:.*\tvext2xv\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v32qi"
 } } */
-/* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } 
} */
+/* { dg-final { scan-assembler 
"test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.d.*-test_vec_unpacku_hi_lo_v32qi" } 
} */
+/* { dg-final { scan-assembler-not 
"test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } 
} */
 void
 test_vec_unpacku_hi_lo_v32qi (void)
 {
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c
index 64ff870a4c57..3f34a430c4e7 100644
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c
@@ -3301,7 +3301,7 @@ __lasx_vext2xv_du_bu (v32i8 _1)
 v32i8
 __lasx_xvpermi_q (v32i8 _1, v32i8 _2)
 {
-  return __builtin_lasx_xvpermi_q (_1, _2, 1);
+  return __builtin_lasx_xvpermi_q (_1, _2, 0x20);
 }
 v4i64
 __lasx_xvpermi_d (v4i64 _1)

Reply via email to