[PATCH v1] LoongArch: Modify the redundant instructions that use vector builtin functions as parameters.

chenxiaolong Tue, 18 Nov 2025 17:12:48 -0800

By analyzing the test cases, it was found that during the combine traversal,
due to the fact that the backend does not support the subreg operation of the
E_V4SFmode to E_V8SFmode type, the calculated rtx_cost value is relatively
large, and the combine operation cannot be performed.


such as:

(insn 7 4 9 2 (set (reg:V8SF 82 [ _6 ])
        (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0))
(insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
        (plus:V8SF (reg:V8SF 82 [ _6 ])
            (reg:V8SF 82 [ _6 ])))

Replaced by
===>
(insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
        (plus:V8SF (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)
            (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)))

test.c

   #include <lasxintrin.h>
    __m256 foo2(__m128 a)
    {
      return __lasx_xvfadd_s (__lasx_cast_128_s (a), __lasx_cast_128_s (a));
    }

Before asm:
foo1:
        addi.d  $r3,$r3,-32
        vinsgr2vr.d     $vr0,$r5,0
        vinsgr2vr.d     $vr0,$r6,1
        vst     $vr0,$r3,0
        xvld    $xr0,$r3,0
        xvfadd.s        $xr0,$xr0,$xr0
        xvst    $xr0,$r4,0
        addi.d  $r3,$r3,32
        jr      $r1

After asm:
foo2:
        vinsgr2vr.d     $vr0,$r5,0
        vinsgr2vr.d     $vr0,$r6,1
        xvfadd.s        $xr0,$xr0,$xr0
        xvst    $xr0,$r4,0
        jr      $r1

gcc/ChangeLog:

        * config/loongarch/loongarch.cc (loongarch_modes_tieable_p):
        Add binding support for vector conversion.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/vector/lasx/vect-extract-256-128.c:
        Modify vld to xvld.
        * gcc.target/loongarch/vect-mode-tieable.c: New test.

Change-Id: I6eea6b6802a8cd04d1690684d949604022f91430
---
 gcc/config/loongarch/loongarch.cc             |  6 ++-
 .../gcc.target/loongarch/vect-mode-tieable.c  | 47 +++++++++++++++++++
 .../vector/lasx/vect-extract-256-128.c        |  8 ++--
 3 files changed, 56 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index fcca0ec8252..fbba6cc9e25 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -7286,7 +7286,11 @@ loongarch_modes_tieable_p (machine_mode mode1, 
machine_mode mode2)
          || (GET_MODE_CLASS(mode1) == MODE_FLOAT
              && GET_MODE_CLASS(mode2) == MODE_INT)
          || (GET_MODE_CLASS(mode2) == MODE_FLOAT
-             && GET_MODE_CLASS(mode1) == MODE_INT));
+             && GET_MODE_CLASS (mode1) == MODE_INT)
+         || (GET_MODE_CLASS (mode1) == MODE_VECTOR_INT
+             && GET_MODE_CLASS (mode2) == MODE_VECTOR_INT)
+         || (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
+             &&  GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT));
 }
 
 /* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c 
b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
new file mode 100644
index 00000000000..d156f92761d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
@@ -0,0 +1,47 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvadd.d (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256i
+foo1 (__m128i a)
+{
+  return __lasx_xvadd_d (__lasx_cast_128 (a), __lasx_cast_128 (a));
+}
+
+/*
+**foo2:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvfadd.s        (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256
+foo2 (__m128 a)
+{
+  return __lasx_xvfadd_s (__lasx_cast_128_s (a), __lasx_cast_128_s (a));
+}
+
+/*
+**foo3:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvfadd.d        (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256d
+foo3 (__m128d a)
+{
+  return __lasx_xvfadd_d (__lasx_cast_128_d (a), __lasx_cast_128_d (a));
+}
diff --git 
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
index d2219ea82de..0c669b0ecc1 100644
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
@@ -6,7 +6,7 @@
 
 /*
 **foo1_lo:
-**     vld     (\$vr[0-9]+),\$r4,0
+**     xvld    (\$xr[0-9]+),\$r4,0
 **     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
 **     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
 **     jr      \$r1
@@ -33,7 +33,7 @@ foo1_hi (__m256 x)
 
 /*
 **foo2_lo:
-**     vld     (\$vr[0-9]+),\$r4,0
+**     xvld    (\$xr[0-9]+),\$r4,0
 **     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
 **     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
 **     jr      \$r1
@@ -51,7 +51,7 @@ foo2_lo (__m256d x)
 **     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
 **     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
 **     jr      \$r1
-*/    
+*/
 __m128d
 foo2_hi (__m256d x)
 {
@@ -60,7 +60,7 @@ foo2_hi (__m256d x)
 
 /*
 **foo3_lo:
-**     vld     (\$vr[0-9]+),\$r4,0
+**     xvld    (\$xr[0-9]+),\$r4,0
 **     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
 **     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
 **     jr      \$r1
-- 
2.20.1

[PATCH v1] LoongArch: Modify the redundant instructions that use vector builtin functions as parameters.

Reply via email to