By analyzing the test cases, it was found that during the combine traversal,
due to the fact that the backend does not support the subreg operation of the
E_V4SFmode to E_V8SFmode type, the calculated rtx_cost value is relatively
large, and the combine operation cannot be performed.
such as:
(insn 7 4 9 2 (set (reg:V8SF 82 [ _6 ])
(subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0))
(insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
(plus:V8SF (reg:V8SF 82 [ _6 ])
(reg:V8SF 82 [ _6 ])))
Replaced by
===>
(insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
(plus:V8SF (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)
(subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)))
test.c
#include <lasxintrin.h>
__m256 foo2(__m128 a)
{
return __lasx_xvfadd_s (__lasx_cast_128_s (a), __lasx_cast_128_s (a));
}
Before asm:
foo1:
addi.d $r3,$r3,-32
vinsgr2vr.d $vr0,$r5,0
vinsgr2vr.d $vr0,$r6,1
vst $vr0,$r3,0
xvld $xr0,$r3,0
xvfadd.s $xr0,$xr0,$xr0
xvst $xr0,$r4,0
addi.d $r3,$r3,32
jr $r1
After asm:
foo2:
vinsgr2vr.d $vr0,$r5,0
vinsgr2vr.d $vr0,$r6,1
xvfadd.s $xr0,$xr0,$xr0
xvst $xr0,$r4,0
jr $r1
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_modes_tieable_p):
Add binding support for vector conversion.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vector/lasx/vect-extract-256-128.c:
Modify vld to xvld.
* gcc.target/loongarch/vect-mode-tieable.c: New test.
Change-Id: I6eea6b6802a8cd04d1690684d949604022f91430
---
gcc/config/loongarch/loongarch.cc | 6 ++-
.../gcc.target/loongarch/vect-mode-tieable.c | 47 +++++++++++++++++++
.../vector/lasx/vect-extract-256-128.c | 8 ++--
3 files changed, 56 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index fcca0ec8252..fbba6cc9e25 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -7286,7 +7286,11 @@ loongarch_modes_tieable_p (machine_mode mode1,
machine_mode mode2)
|| (GET_MODE_CLASS(mode1) == MODE_FLOAT
&& GET_MODE_CLASS(mode2) == MODE_INT)
|| (GET_MODE_CLASS(mode2) == MODE_FLOAT
- && GET_MODE_CLASS(mode1) == MODE_INT));
+ && GET_MODE_CLASS (mode1) == MODE_INT)
+ || (GET_MODE_CLASS (mode1) == MODE_VECTOR_INT
+ && GET_MODE_CLASS (mode2) == MODE_VECTOR_INT)
+ || (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
+ && GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT));
}
/* Implement TARGET_PREFERRED_RELOAD_CLASS. */
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
new file mode 100644
index 00000000000..d156f92761d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
@@ -0,0 +1,47 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** xvadd.d (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256i
+foo1 (__m128i a)
+{
+ return __lasx_xvadd_d (__lasx_cast_128 (a), __lasx_cast_128 (a));
+}
+
+/*
+**foo2:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** xvfadd.s (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256
+foo2 (__m128 a)
+{
+ return __lasx_xvfadd_s (__lasx_cast_128_s (a), __lasx_cast_128_s (a));
+}
+
+/*
+**foo3:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** xvfadd.d (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256d
+foo3 (__m128d a)
+{
+ return __lasx_xvfadd_d (__lasx_cast_128_d (a), __lasx_cast_128_d (a));
+}
diff --git
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
index d2219ea82de..0c669b0ecc1 100644
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
@@ -6,7 +6,7 @@
/*
**foo1_lo:
-** vld (\$vr[0-9]+),\$r4,0
+** xvld (\$xr[0-9]+),\$r4,0
** vpickve2gr.du \$r4,(\$vr[0-9]+),0
** vpickve2gr.du \$r5,(\$vr[0-9]+),1
** jr \$r1
@@ -33,7 +33,7 @@ foo1_hi (__m256 x)
/*
**foo2_lo:
-** vld (\$vr[0-9]+),\$r4,0
+** xvld (\$xr[0-9]+),\$r4,0
** vpickve2gr.du \$r4,(\$vr[0-9]+),0
** vpickve2gr.du \$r5,(\$vr[0-9]+),1
** jr \$r1
@@ -51,7 +51,7 @@ foo2_lo (__m256d x)
** vpickve2gr.du \$r4,(\$vr[0-9]+),0
** vpickve2gr.du \$r5,(\$vr[0-9]+),1
** jr \$r1
-*/
+*/
__m128d
foo2_hi (__m256d x)
{
@@ -60,7 +60,7 @@ foo2_hi (__m256d x)
/*
**foo3_lo:
-** vld (\$vr[0-9]+),\$r4,0
+** xvld (\$xr[0-9]+),\$r4,0
** vpickve2gr.du \$r4,(\$vr[0-9]+),0
** vpickve2gr.du \$r5,(\$vr[0-9]+),1
** jr \$r1
--
2.20.1