Re: [pushed][PATCH v2] LoongArch: Implement vector reduction from 256-bit to 128-bit

Lulu Cheng Sat, 25 Oct 2025 00:53:29 -0700

Pushed to r16-4619.

在 2025/10/23 下午2:29, Jiahao Xu 写道:

gcc/ChangeLog:


        * config/loongarch/lasx.md (vec_extract<mode><lasxhalf>): New 
define_expand.
        (vec_extract_lo_<mode>): New define_insn_and_split.
        (vec_extract_hi_<mode>): New define_insn.
        * config/loongarch/loongarch-protos.h 
(loongarch_check_vect_par_cnst_half)
        New function prototype.
        * config/loongarch/loongarch.cc (loongarch_split_reduction):
        Implement TARGET_VECTORIZE_SPLIT_REDUCTION.
        (loongarch_check_vect_par_cnst_half): New function.
        * config/loongarch/predicates.md
        (vect_par_cnst_low_half): New predicate.
        (vect_par_cnst_high_half): New predicate.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/lasx-reduc-1.c: New test.
---
  gcc/config/loongarch/lasx.md                  | 42 ++++++++++++++++
  gcc/config/loongarch/loongarch-protos.h       |  1 +
  gcc/config/loongarch/loongarch.cc             | 48 +++++++++++++++++++
  gcc/config/loongarch/predicates.md            | 16 +++++++
  .../gcc.target/loongarch/lasx-reduc-1.c       | 17 +++++++
  5 files changed, 124 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 3d71f30a54b..eed4d2b186b 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -633,6 +633,48 @@ (define_insn_and_split "vec_extract<mode>_0"
    [(set_attr "move_type" "fmove")
     (set_attr "mode" "<UNITMODE>")])

+(define_expand "vec_extract<mode><lasxhalf>"

+  [(match_operand:<VHMODE256_ALL> 0 "register_operand")
+   (match_operand:LASX 1 "register_operand")
+   (match_operand 2 "const_0_or_1_operand")]
+  "ISA_HAS_LASX"
+{
+  if (INTVAL (operands[2]))
+    {
+     operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, true);
+     emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1],
+                 operands[2]));
+    }
+  else
+    {
+     operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, false);
+     emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1],
+                 operands[2]));
+    }
+  DONE;
+})
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX 1 "register_operand" "f")
+      (match_operand:LASX 2 "vect_par_cnst_low_half")))]
+  "ISA_HAS_LASX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);")
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX 1 "register_operand" "f")
+      (match_operand:LASX 2 "vect_par_cnst_high_half")))]
+  "ISA_HAS_LASX"
+  "xvpermi.d\t%u0,%u1,0xe"
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "<MODE>")])
+
  (define_expand "vec_perm<mode>"
   [(match_operand:LASX 0 "register_operand")
    (match_operand:LASX 1 "register_operand")
diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 6139af48d7a..6ecbe27218c 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -121,6 +121,7 @@ extern bool loongarch_const_vector_same_int_p (rtx, 
machine_mode,
  extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode);
  extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode);
  extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode);
+extern bool loongarch_check_vect_par_cnst_half (rtx, machine_mode, bool);
  extern rtx loongarch_const_vector_vrepli (rtx, machine_mode);
  extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool);
  extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 3fe8c766cc7..c782cac0ff9 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1846,6 +1846,37 @@ loongarch_const_vector_shuffle_set_p (rtx op, 
machine_mode mode)
    return true;
  }

+/* Check if OP is a PARALLEL RTX with CONST_INT elements representing

+   the HIGH (high_p == TRUE) or LOW (high_p == FALSE) half of a vector
+   for mode MODE. Returns true if the pattern matches, false otherwise.  */
+
+bool
+loongarch_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high_p)
+{
+  int nunits = XVECLEN (op, 0);
+  int nelts = GET_MODE_NUNITS (mode);
+
+  if (!known_eq (nelts, nunits * 2))
+    return false;
+
+  rtx first = XVECEXP (op, 0, 0);
+  if (!CONST_INT_P (first))
+    return false;
+
+  int base = high_p ? nelts / 2 : 0;
+  if (INTVAL (first) != base)
+    return false;
+
+  for (int i = 1; i < nunits; i++)
+    {
+      rtx elem = XVECEXP (op, 0, i);
+      if (!CONST_INT_P (elem) || INTVAL (elem) != INTVAL (first) + i)
+       return false;
+    }
+
+  return true;
+}
+
  rtx
  loongarch_const_vector_vrepli (rtx x, machine_mode mode)
  {
@@ -4143,6 +4174,19 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
      }
  }

+/* All CPUs prefer to avoid cross-lane operations so perform reductions

+   upper against lower halves up to LSX reg size.  */
+
+machine_mode
+loongarch_split_reduction (machine_mode mode)
+{
+  if (LSX_SUPPORTED_MODE_P (mode))
+    return mode;
+
+  return mode_for_vector (as_a <scalar_mode> (GET_MODE_INNER (mode)),
+                         GET_MODE_NUNITS (mode) / 2).require ();
+}
+
  /* Implement targetm.vectorize.builtin_vectorization_cost.  */

static int

@@ -11397,6 +11441,10 @@ loongarch_can_inline_p (tree caller, tree callee)
  #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
    loongarch_autovectorize_vector_modes

+#undef TARGET_VECTORIZE_SPLIT_REDUCTION

+#define TARGET_VECTORIZE_SPLIT_REDUCTION \
+  loongarch_split_reduction
+
  #undef TARGET_OPTAB_SUPPORTED_P
  #define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p

diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md

index fd2d7b9ab55..34cf74d5d66 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -699,3 +699,19 @@ (define_special_predicate "vect_par_cnst_even_or_odd_half"

return true;

  })
+
+;; PARALLEL for a vec_select that selects the low half
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_low_half"
+  (match_code "parallel")
+{
+  return loongarch_check_vect_par_cnst_half (op, mode, false);
+})
+
+;; PARALLEL for a vec_select that selects the high half
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_high_half"
+  (match_code "parallel")
+{
+  return loongarch_check_vect_par_cnst_half (op, mode, true);;
+})
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c 
b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
new file mode 100644
index 00000000000..e4492593aa9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funsafe-math-optimizations -mlasx -fno-unroll-loops 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 4 "optimized" } } */
+
+#define DEFINE_SUM_FUNCTION(T, FUNC_NAME, SIZE) \
+T FUNC_NAME(const T arr[]) { \
+    arr = __builtin_assume_aligned(arr, 64); \
+    T sum = 0; \
+    for (int i = 0; i < SIZE; i++) \
+        sum += arr[i]; \
+    return sum; \
+}
+
+DEFINE_SUM_FUNCTION (int, sum_int_1040, 1028)
+DEFINE_SUM_FUNCTION (float, sum_float_1040, 1028)
+DEFINE_SUM_FUNCTION (long, sum_long_1040, 1026)
+DEFINE_SUM_FUNCTION (double, sum_double_1040, 1026)

Re: [pushed][PATCH v2] LoongArch: Implement vector reduction from 256-bit to 128-bit

Reply via email to