[gcc r16-5266] LoongArch: optimize half of vector copy for V4DFmode.

LuluCheng via Gcc-cvs Thu, 13 Nov 2025 17:16:28 -0800

https://gcc.gnu.org/g:0d2fed9a0461eefc6ae66df0390be763ad3e6c8e


commit r16-5266-g0d2fed9a0461eefc6ae66df0390be763ad3e6c8e
Author: zhaozhou <[email protected]>
Date:   Mon Nov 10 15:04:01 2025 +0800

    LoongArch: optimize half of vector copy for V4DFmode.
    
    Repalce xvpermi to xvbsrl when vector of V4DFmode high 64 bits copy to
    low 64 bits, reduce 2 insn delays.
    
    gcc/ChangeLog:
    
            * config/loongarch/lasx.md (lasx_xvbsrl_d_f): New template.
            * config/loongarch/loongarch.cc (emit_reduc_half): Replace insn.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/loongarch/vec_reduc_half.c: New test.

Diff:
---
 gcc/config/loongarch/lasx.md                        | 10 ++++++++++
 gcc/config/loongarch/loongarch.cc                   |  2 +-
 gcc/testsuite/gcc.target/loongarch/vec_reduc_half.c | 10 ++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 71dd25d0b5a2..e0af1e4c5909 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2702,6 +2702,16 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "lasx_xvbsrl_d_f"
+  [(set (match_operand:V4DF 0 "register_operand" "=f")
+       (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")
+                     (match_operand 2 "const_uimm5_operand" "")]
+                     UNSPEC_LASX_XVBSRL_V))]
+  "ISA_HAS_LASX"
+  "xvbsrl.v\t%u0,%u1,%2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "V4DF")])
+
 (define_insn "lasx_xvbsll_<lasxfmt>"
   [(set (match_operand:ILASX 0 "register_operand" "=f")
        (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index e7c291f30563..fcca0ec8252b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -10092,7 +10092,7 @@ emit_reduc_half (rtx dest, rtx src, int i)
       if (i == 256)
        tem = gen_lasx_xvpermi_d_v4df (dest, src, GEN_INT (0xe));
       else
-       tem = gen_lasx_xvpermi_d_v4df (dest, src, const1_rtx);
+       tem = gen_lasx_xvbsrl_d_f (dest, src, GEN_INT (0x8));
       break;
     case E_V32QImode:
     case E_V16HImode:
diff --git a/gcc/testsuite/gcc.target/loongarch/vec_reduc_half.c 
b/gcc/testsuite/gcc.target/loongarch/vec_reduc_half.c
new file mode 100644
index 000000000000..39e817bddaff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vec_reduc_half.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -mlasx" } */
+
+double
+foo_1 (double *a, double *b)
+{
+  return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
+}
+
+/* { dg-final { scan-assembler-times "xvpermi.d" 1} } */

[gcc r16-5266] LoongArch: optimize half of vector copy for V4DFmode.

Reply via email to