gcc/ChangeLog:
* config/loongarch/lasx.md (vec_extract<mode><lasxhalf>): New
define_expand.
(vec_extract_lo_<mode>): New define_insn_and_split.
(vec_extract_hi_<mode>): New define_insn.
* config/loongarch/loongarch-protos.h
(loongarch_check_vect_par_cnst_half)
New function prototype.
* config/loongarch/loongarch.cc (loongarch_split_reduction):
Implement TARGET_VECTORIZE_SPLIT_REDUCTION.
(loongarch_check_vect_par_cnst_half): New function.
* config/loongarch/predicates.md
(vect_par_cnst_low_half): New predicate.
(vect_par_cnst_high_half): New predicate.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/lasx-reduc-1.c: New test.
---
gcc/config/loongarch/lasx.md | 42 ++++++++++++++++
gcc/config/loongarch/loongarch-protos.h | 1 +
gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++++++
gcc/config/loongarch/predicates.md | 16 +++++++
.../gcc.target/loongarch/lasx-reduc-1.c | 17 +++++++
5 files changed, 124 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 3d71f30a54b..eed4d2b186b 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -633,6 +633,48 @@ (define_insn_and_split "vec_extract<mode>_0"
[(set_attr "move_type" "fmove")
(set_attr "mode" "<UNITMODE>")])
+(define_expand "vec_extract<mode><lasxhalf>"
+ [(match_operand:<VHMODE256_ALL> 0 "register_operand")
+ (match_operand:LASX 1 "register_operand")
+ (match_operand 2 "const_0_or_1_operand")]
+ "ISA_HAS_LASX"
+{
+ if (INTVAL (operands[2]))
+ {
+ operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, true);
+ emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ {
+ operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, false);
+ emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1],
+ operands[2]));
+ }
+ DONE;
+})
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+ [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX 1 "register_operand" "f")
+ (match_operand:LASX 2 "vect_par_cnst_low_half")))]
+ "ISA_HAS_LASX"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+ "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);")
+
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX 1 "register_operand" "f")
+ (match_operand:LASX 2 "vect_par_cnst_high_half")))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,0xe"
+ [(set_attr "move_type" "fmove")
+ (set_attr "mode" "<MODE>")])
+
(define_expand "vec_perm<mode>"
[(match_operand:LASX 0 "register_operand")
(match_operand:LASX 1 "register_operand")
diff --git a/gcc/config/loongarch/loongarch-protos.h
b/gcc/config/loongarch/loongarch-protos.h
index 6139af48d7a..6ecbe27218c 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -121,6 +121,7 @@ extern bool loongarch_const_vector_same_int_p (rtx,
machine_mode,
extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode);
extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode);
extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode);
+extern bool loongarch_check_vect_par_cnst_half (rtx, machine_mode, bool);
extern rtx loongarch_const_vector_vrepli (rtx, machine_mode);
extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool);
extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT);
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 3fe8c766cc7..c782cac0ff9 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1846,6 +1846,37 @@ loongarch_const_vector_shuffle_set_p (rtx op,
machine_mode mode)
return true;
}
+/* Check if OP is a PARALLEL RTX with CONST_INT elements representing
+ the HIGH (high_p == TRUE) or LOW (high_p == FALSE) half of a vector
+ for mode MODE. Returns true if the pattern matches, false otherwise. */
+
+bool
+loongarch_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high_p)
+{
+ int nunits = XVECLEN (op, 0);
+ int nelts = GET_MODE_NUNITS (mode);
+
+ if (!known_eq (nelts, nunits * 2))
+ return false;
+
+ rtx first = XVECEXP (op, 0, 0);
+ if (!CONST_INT_P (first))
+ return false;
+
+ int base = high_p ? nelts / 2 : 0;
+ if (INTVAL (first) != base)
+ return false;
+
+ for (int i = 1; i < nunits; i++)
+ {
+ rtx elem = XVECEXP (op, 0, i);
+ if (!CONST_INT_P (elem) || INTVAL (elem) != INTVAL (first) + i)
+ return false;
+ }
+
+ return true;
+}
+
rtx
loongarch_const_vector_vrepli (rtx x, machine_mode mode)
{
@@ -4143,6 +4174,19 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int
outer_code,
}
}
+/* All CPUs prefer to avoid cross-lane operations so perform reductions
+ upper against lower halves up to LSX reg size. */
+
+machine_mode
+loongarch_split_reduction (machine_mode mode)
+{
+ if (LSX_SUPPORTED_MODE_P (mode))
+ return mode;
+
+ return mode_for_vector (as_a <scalar_mode> (GET_MODE_INNER (mode)),
+ GET_MODE_NUNITS (mode) / 2).require ();
+}
+
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
@@ -11397,6 +11441,10 @@ loongarch_can_inline_p (tree caller, tree callee)
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
loongarch_autovectorize_vector_modes
+#undef TARGET_VECTORIZE_SPLIT_REDUCTION
+#define TARGET_VECTORIZE_SPLIT_REDUCTION \
+ loongarch_split_reduction
+
#undef TARGET_OPTAB_SUPPORTED_P
#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index fd2d7b9ab55..34cf74d5d66 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -699,3 +699,19 @@ (define_special_predicate "vect_par_cnst_even_or_odd_half"
return true;
})
+
+;; PARALLEL for a vec_select that selects the low half
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_low_half"
+ (match_code "parallel")
+{
+ return loongarch_check_vect_par_cnst_half (op, mode, false);
+})
+
+;; PARALLEL for a vec_select that selects the high half
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_high_half"
+ (match_code "parallel")
+{
+ return loongarch_check_vect_par_cnst_half (op, mode, true);;
+})
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
new file mode 100644
index 00000000000..e4492593aa9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funsafe-math-optimizations -mlasx -fno-unroll-loops
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 4 "optimized" } } */
+
+#define DEFINE_SUM_FUNCTION(T, FUNC_NAME, SIZE) \
+T FUNC_NAME(const T arr[]) { \
+ arr = __builtin_assume_aligned(arr, 64); \
+ T sum = 0; \
+ for (int i = 0; i < SIZE; i++) \
+ sum += arr[i]; \
+ return sum; \
+}
+
+DEFINE_SUM_FUNCTION (int, sum_int_1040, 1028)
+DEFINE_SUM_FUNCTION (float, sum_float_1040, 1028)
+DEFINE_SUM_FUNCTION (long, sum_long_1040, 1026)
+DEFINE_SUM_FUNCTION (double, sum_double_1040, 1026)