We used to apply -mrvv-max-lmul= to limit VLS code gen, auto vectorizer, and builtin string function expansion. But I think the VLS code gen part doesn't need this limit, since it only happens when the user explicitly writes vector types.
For example, int32x8_t under -mrvv-max-lmul=m1 with VLEN=128 would be split into two int32x4_t, which generate more instructions and runs slower. In this patch, I changed -mrvv-max-lmul= to only affect auto vectorization and builtin string function expansion. Actually, the option's help text already says it only controls the LMUL used by auto-vectorization, so I believe this change is makes sense :) gcc/ChangeLog: * config/riscv/riscv-protos.h (vls_mode_valid_p): New argument allow_up_to_lmul_8. * config/riscv/riscv-v.cc (autovectorize_vector_modes): Set allow_up_to_lmul_8 to false. (vls_mode_valid_p): Add new argument allow_up_to_lmul_8, and use it to determine whether to allow LMUL 8. gcc/testsuite/ChangeLog: * gcc.target.riscv/rvv/vls-type-rvv-max-lmul.c: New test. --- gcc/config/riscv/riscv-protos.h | 2 +- gcc/config/riscv/riscv-v.cc | 31 ++++++++++--------- .../riscv/rvv/vls-type-rvv-max-lmul.c | 12 +++++++ 3 files changed, 29 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 539321ff95b..045ee09b23f 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -763,7 +763,7 @@ opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode, unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool); bool cmp_lmul_le_one (machine_mode); bool cmp_lmul_gt_one (machine_mode); -bool vls_mode_valid_p (machine_mode); +bool vls_mode_valid_p (machine_mode, bool allow_up_to_lmul_8 = true); bool vlmax_avl_type_p (rtx_insn *); bool has_vl_op (rtx_insn *); bool tail_agnostic_p (rtx_insn *); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index c9c83282732..3484f6442e7 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2910,7 +2910,7 @@ autovectorize_vector_modes (vector_modes *modes, bool) machine_mode mode; while (size > 0 && get_vector_mode (QImode, size).exists (&mode)) { - if (vls_mode_valid_p (mode)) + if (vls_mode_valid_p (mode, /* allow_up_to_lmul_8 */ false)) modes->safe_push (mode); i++; @@ -5027,26 +5027,27 @@ cmp_lmul_gt_one (machine_mode mode) Then we can have the condition for VLS mode in fixed-vlmax, aka: PRECISION (VLSmode) < VLEN / (64 / PRECISION(VLS_inner_mode)). */ bool -vls_mode_valid_p (machine_mode vls_mode) +vls_mode_valid_p (machine_mode vls_mode, bool allow_up_to_lmul_8) { if (!TARGET_VECTOR || TARGET_XTHEADVECTOR) return false; if (rvv_vector_bits == RVV_VECTOR_BITS_SCALABLE) { - if (GET_MODE_CLASS (vls_mode) != MODE_VECTOR_BOOL - && !ordered_p (TARGET_MAX_LMUL * BITS_PER_RISCV_VECTOR, - GET_MODE_PRECISION (vls_mode))) - /* We enable VLS modes which are aligned with TARGET_MAX_LMUL and - BITS_PER_RISCV_VECTOR. - - e.g. When TARGET_MAX_LMUL = 1 and BITS_PER_RISCV_VECTOR = (128,128). - We enable VLS modes have fixed size <= 128bit. Since ordered_p is - false between VLA modes with size = (128, 128) bits and VLS mode - with size = 128 bits, we will end up with multiple ICEs in - middle-end generic codes. */ - return false; - return true; + if (GET_MODE_CLASS (vls_mode) != MODE_VECTOR_BOOL) + return true; + if (allow_up_to_lmul_8) + return true; + /* We enable VLS modes which are aligned with TARGET_MAX_LMUL and + BITS_PER_RISCV_VECTOR. + + e.g. When TARGET_MAX_LMUL = 1 and BITS_PER_RISCV_VECTOR = (128,128). + We enable VLS modes have fixed size <= 128bit. Since ordered_p is + false between VLA modes with size = (128, 128) bits and VLS mode + with size = 128 bits, we will end up with multiple ICEs in + middle-end generic codes. */ + return !ordered_p (TARGET_MAX_LMUL * BITS_PER_RISCV_VECTOR, + GET_MODE_PRECISION (vls_mode)); } if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c b/gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c new file mode 100644 index 00000000000..5d52f7798d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m1 -fdump-tree-optimized" } */ + +typedef long long int64x8_t __attribute__((vector_size(64))); + +int64x8_t foo(int64x8_t a, int64x8_t b) +{ + return a + b; +} +/* Make sure we can us up to LMUL 4 to process int64x8_t at once rather than + break that into 4 LMUL 1 operations. */ +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e64,m4,t[au],m[au]} } } */ -- 2.34.1