[PATCH] LoongArch: Implement vector reduction from 256-bit to 128-bit

Jiahao Xu Wed, 22 Oct 2025 02:08:01 -0700

gcc/ChangeLog:

        * config/loongarch/lasx.md (vec_extract<mode><lasxhalf>): New 
define_expand.
        (vec_extract_lo_<mode>): New define_insn_and_split.
        (vec_extract_hi_<mode>): New define_insn.
        (@vec_extract_lo_<mode>): New define_insn_and_split.
        (@vec_extract_hi_<mode>): New define_insn.
        (vec_extract_lo_v16hi): New define_insn_and_split.
        (vec_extract_hi_v16hi): New define_insn.
        (vec_extract_lo_v32qi): New define_insn_and_split.
        (vec_extract_hi_v32qi): New define_insn.
        * config/loongarch/loongarch.cc (loongarch_split_reduction):
        Implement TARGET_VECTORIZE_SPLIT_REDUCTION.


gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/lasx-reduc-1.c: New test.
---
 gcc/config/loongarch/lasx.md                  | 119 ++++++++++++++++++
 gcc/config/loongarch/loongarch.cc             |  29 +++++
 .../gcc.target/loongarch/lasx-reduc-1.c       |  11 ++
 3 files changed, 159 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 3d71f30a54b..b8f881ddcd6 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -633,6 +633,125 @@ (define_insn_and_split "vec_extract<mode>_0"
   [(set_attr "move_type" "fmove")
    (set_attr "mode" "<UNITMODE>")])
 
+(define_expand "vec_extract<mode><lasxhalf>"
+  [(match_operand:<VHMODE256_ALL> 0 "register_operand")
+   (match_operand:LASX 1 "register_operand")
+   (match_operand 2 "const_0_or_1_operand")]
+  "ISA_HAS_LASX"
+{
+  if (INTVAL (operands[2]))
+    emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+  else
+    emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX_D 1 "register_operand" "f")
+      (parallel [(const_int 0) (const_int 1)])))]
+  "ISA_HAS_LASX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);")
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX_D 1 "register_operand" "f")
+      (parallel [(const_int 2) (const_int 3)])))]
+  "ISA_HAS_LASX"
+  "xvpermi.d\t%u0,%u1,0xe"
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "@vec_extract_lo_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX_W 1 "register_operand" "f")
+      (parallel [(const_int 0) (const_int 1)
+             (const_int 2) (const_int 3)])))]
+  "ISA_HAS_LASX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);")
+
+(define_insn "@vec_extract_hi_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX_W 1 "register_operand" "f")
+      (parallel [(const_int 4) (const_int 5)
+             (const_int 6) (const_int 7)])))]
+  "ISA_HAS_LASX"
+  "xvpermi.d\t%u0,%u1,0xe"
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "vec_extract_lo_v16hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+    (vec_select:V8HI
+      (match_operand:V16HI 1 "register_operand" "f")
+      (parallel [(const_int 0) (const_int 1)
+             (const_int 2) (const_int 3)
+             (const_int 4) (const_int 5)
+             (const_int 6) (const_int 7)])))]
+  "ISA_HAS_LASX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (V8HImode, operands[1]);")
+
+(define_insn "vec_extract_hi_v16hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+    (vec_select:V8HI
+      (match_operand:V16HI 1 "register_operand" "f")
+      (parallel [(const_int 8) (const_int 9)
+             (const_int 10) (const_int 11)
+             (const_int 12) (const_int 13)
+             (const_int 14) (const_int 15)])))]
+  "ISA_HAS_LASX"
+  "xvpermi.d\t%u0,%u1,0xe"
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "V16HI")])
+
+(define_insn_and_split "vec_extract_lo_v32qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=f")
+    (vec_select:V16QI
+      (match_operand:V32QI 1 "register_operand" "f")
+      (parallel [(const_int 0) (const_int 1)
+             (const_int 2) (const_int 3)
+             (const_int 4) (const_int 5)
+             (const_int 6) (const_int 7)
+             (const_int 8) (const_int 9)
+             (const_int 10) (const_int 11)
+             (const_int 12) (const_int 13)
+             (const_int 14) (const_int 15)])))]
+  "ISA_HAS_LASX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (V16QImode, operands[1]);")
+
+(define_insn "vec_extract_hi_v32qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=f")
+    (vec_select:V16QI
+      (match_operand:V32QI 1 "register_operand" "f")
+      (parallel [(const_int 16) (const_int 17)
+             (const_int 18) (const_int 19)
+             (const_int 20) (const_int 21)
+             (const_int 22) (const_int 23)
+             (const_int 24) (const_int 25)
+             (const_int 26) (const_int 27)
+             (const_int 28) (const_int 29)
+             (const_int 30) (const_int 31)])))]
+  "ISA_HAS_LASX"
+  "xvpermi.d\t%u0,%u1,0xe"
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "V32QI")])
+
 (define_expand "vec_perm<mode>"
  [(match_operand:LASX 0 "register_operand")
   (match_operand:LASX 1 "register_operand")
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 3fe8c766cc7..7bc856b6c60 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4143,6 +4143,31 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
     }
 }
 
+/* All CPUs prefer to avoid cross-lane operations so perform reductions
+   upper against lower halves up to LSX reg size.  */
+
+machine_mode
+loongarch_split_reduction (machine_mode mode)
+{
+  switch (mode)
+    {
+    case E_V4DImode:
+      return V2DImode;
+    case E_V8SImode:
+      return V4SImode;
+    case E_V16HImode:
+      return V8HImode;
+    case E_V32QImode:
+      return V16QImode;
+    case E_V8SFmode:
+      return V4SFmode;
+    case E_V4DFmode:
+      return V2DFmode;
+    default:
+      return mode;
+    }
+}
+
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 
 static int
@@ -11397,6 +11422,10 @@ loongarch_can_inline_p (tree caller, tree callee)
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
   loongarch_autovectorize_vector_modes
 
+#undef TARGET_VECTORIZE_SPLIT_REDUCTION
+#define TARGET_VECTORIZE_SPLIT_REDUCTION \
+  loongarch_split_reduction
+
 #undef TARGET_OPTAB_SUPPORTED_P
 #define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
 
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c 
b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
new file mode 100644
index 00000000000..eb3933b7079
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 1 "optimized" } } */
+
+int sumint(const int arr[]) {
+    arr = __builtin_assume_aligned (arr, 64);
+    int sum = 0;
+    for (int i = 0 ; i < 1040; i++)
+      sum += arr[i];
+    return sum;
+}
-- 
2.20.1

[PATCH] LoongArch: Implement vector reduction from 256-bit to 128-bit

Reply via email to