Hi Kito,
On 02/09/2025 05:17, Kito Cheng wrote:
Hi Paul-Antoine:
diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md
index d56fb5f237c..5cb34cef418 100644
--- gcc/config/riscv/autovec-opt.md
+++ gcc/config/riscv/autovec-opt.md
@@ -2163,3 +2163,41 @@ (define_insn_and_split "*vfmax_vf_<mode>"
}
[(set_attr "type" "vfminmax")]
)
+
+(define_insn_and_split "*vfmax_vf_ieee_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand")
+ (unspec:V_VLSF [
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 2 "register_operand"))
+ (match_operand:V_VLSF 1 "register_operand")
+ ] UNSPEC_VFMAX))]
+ "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
Could you try to use the iterator to merge the pattern with vfmin_vf?
you can grep UNSPEC_VFMAXMIN and see the example in vector.md
I have attached an amended patch as you suggested. Here is the relevant
snippet:
(define_insn_and_split "*v<ieee_fmaxmin_op>_vf_<mode>"
[(set (match_operand:V_VLSF 0 "register_operand")
(unspec:V_VLSF [
(vec_duplicate:V_VLSF
(match_operand:<VEL> 2 "register_operand"))
(match_operand:V_VLSF 1 "register_operand")
] UNSPEC_VFMAXMIN))]
"TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<IEEE_FMAXMIN_OP>,
<MODE>mode),
riscv_vector::BINARY_OP, operands);
DONE;
}
[(set_attr "type" "vfminmax")]
)
(define_insn_and_split "*v<ieee_fmaxmin_op>_vf_<mode>"
[(set (match_operand:V_VLSF 0 "register_operand")
(unspec:V_VLSF [
(match_operand:V_VLSF 1 "register_operand")
(vec_duplicate:V_VLSF
(match_operand:<VEL> 2 "register_operand"))
] UNSPEC_VFMAXMIN))]
"TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<IEEE_FMAXMIN_OP>,
<MODE>mode),
riscv_vector::BINARY_OP, operands);
DONE;
}
[(set_attr "type" "vfminmax")]
)
--
PA
commit 959f474db53c113c4356fa09ddc62fe9429c896f
Author: Paul-Antoine Arras <par...@baylibre.com>
Date: Mon Sep 1 15:54:26 2025 +0200
RISC-V: Add patterns for vector-scalar IEEE floating-point max
These patterns enable the combine pass (or late-combine, depending on the case)
to merge a vec_duplicate into an unspec_vfmax RTL instruction.
Before this patch, we have two instructions, e.g.:
vfmv.v.f v2,fa0
vfmax.vv v1,v2,v1
After, we get only one:
vfmax.vf v1,v1,fa0
In some cases, it also shaves off one vsetvli.
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*vfmin_vf_ieee_<mode>): Rename into...
(*v<ieee_fmaxmin_op>_vf_<mode>): New pattern to combine vec_duplicate +
vf{max,min}.vv (unspec) into vf{max,min}.vf.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f16.c: Add vfmax.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f64.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f64.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f64.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f16.c: Add vfmax. Also add
missing -fno-fast-math.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f64.c: Likewise.
diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md
index d56fb5f237c..f2afdfef166 100644
--- gcc/config/riscv/autovec-opt.md
+++ gcc/config/riscv/autovec-opt.md
@@ -2088,7 +2088,7 @@ (define_insn_and_split "*vfrdiv_vf_<mode>"
[(set_attr "type" "vfdiv")]
)
-;; vfmin.vf
+;; vfmin.vf, vfmax.vf
(define_insn_and_split "*vfmin_vf_<mode>"
[(set (match_operand:V_VLSF 0 "register_operand")
(smin:V_VLSF
@@ -2107,57 +2107,58 @@ (define_insn_and_split "*vfmin_vf_<mode>"
[(set_attr "type" "vfminmax")]
)
-(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+(define_insn_and_split "*vfmax_vf_<mode>"
[(set (match_operand:V_VLSF 0 "register_operand")
- (unspec:V_VLSF [
+ (smax:V_VLSF
(vec_duplicate:V_VLSF
(match_operand:<VEL> 2 "register_operand"))
- (match_operand:V_VLSF 1 "register_operand")
- ] UNSPEC_VFMIN))]
- "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
+ (match_operand:V_VLSF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
- riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (SMAX, <MODE>mode),
riscv_vector::BINARY_OP, operands);
DONE;
}
[(set_attr "type" "vfminmax")]
)
-(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+(define_insn_and_split "*v<ieee_fmaxmin_op>_vf_<mode>"
[(set (match_operand:V_VLSF 0 "register_operand")
(unspec:V_VLSF [
- (match_operand:V_VLSF 1 "register_operand")
(vec_duplicate:V_VLSF
(match_operand:<VEL> 2 "register_operand"))
- ] UNSPEC_VFMIN))]
+ (match_operand:V_VLSF 1 "register_operand")
+ ] UNSPEC_VFMAXMIN))]
"TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
- riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<IEEE_FMAXMIN_OP>,
+ <MODE>mode),
riscv_vector::BINARY_OP, operands);
DONE;
}
[(set_attr "type" "vfminmax")]
)
-;; vfmax.vf
-(define_insn_and_split "*vfmax_vf_<mode>"
+(define_insn_and_split "*v<ieee_fmaxmin_op>_vf_<mode>"
[(set (match_operand:V_VLSF 0 "register_operand")
- (smax:V_VLSF
+ (unspec:V_VLSF [
+ (match_operand:V_VLSF 1 "register_operand")
(vec_duplicate:V_VLSF
(match_operand:<VEL> 2 "register_operand"))
- (match_operand:V_VLSF 1 "register_operand")))]
- "TARGET_VECTOR && can_create_pseudo_p ()"
+ ] UNSPEC_VFMAXMIN))]
+ "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
- riscv_vector::emit_vlmax_insn (code_for_pred_scalar (SMAX, <MODE>mode),
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<IEEE_FMAXMIN_OP>,
+ <MODE>mode),
riscv_vector::BINARY_OP, operands);
DONE;
}
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f16.c
index 1a20ee78536..ba8eec0bb7c 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f16.c
@@ -4,5 +4,7 @@
#include "vf_binop.h"
DEF_VF_BINOP_CASE_2_WRAP (_Float16, __builtin_fminf16, min)
+DEF_VF_BINOP_CASE_2_WRAP (_Float16, __builtin_fmaxf16, max)
/* { dg-final { scan-assembler-times {vfmin.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfmax.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f32.c
index 1e0f7f5cb15..b5a1991ab89 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f32.c
@@ -4,5 +4,7 @@
#include "vf_binop.h"
DEF_VF_BINOP_CASE_2_WRAP (float, __builtin_fminf, min)
+DEF_VF_BINOP_CASE_2_WRAP (float, __builtin_fmaxf, max)
/* { dg-final { scan-assembler-times {vfmin.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfmax.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f64.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f64.c
index 61db2df4521..a6fc781d87a 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f64.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-5-f64.c
@@ -4,5 +4,7 @@
#include "vf_binop.h"
DEF_VF_BINOP_CASE_2_WRAP (double, __builtin_fmin, min)
+DEF_VF_BINOP_CASE_2_WRAP (double, __builtin_fmax, max)
/* { dg-final { scan-assembler-times {vfmin.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfmax.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f16.c
index 392580abddc..afd64e78072 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f16.c
@@ -4,3 +4,4 @@
#include "vf-5-f16.c"
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
+/* { dg-final { scan-assembler-not {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f32.c
index 9dbd226c042..edcecffdf06 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f32.c
@@ -4,3 +4,4 @@
#include "vf-5-f32.c"
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
+/* { dg-final { scan-assembler-not {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f64.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f64.c
index 44a17cd1ff6..fafaa254cd9 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f64.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-6-f64.c
@@ -4,3 +4,4 @@
#include "vf-5-f64.c"
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
+/* { dg-final { scan-assembler-not {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f16.c
index 0883c882201..362fdfa69c0 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f16.c
@@ -3,6 +3,10 @@
#include "vf_binop.h"
-DEF_VF_BINOP_CASE_3_WRAP (_Float16, __builtin_fminf16, min, VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_CASE_3_WRAP (_Float16, __builtin_fminf16, min,
+ VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_CASE_3_WRAP (_Float16, __builtin_fmaxf16, max,
+ VF_BINOP_FUNC_BODY_X128)
/* { dg-final { scan-assembler {vfmin.vf} } } */
+/* { dg-final { scan-assembler {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f32.c
index 85282404ad2..2944a35451d 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f32.c
@@ -4,5 +4,7 @@
#include "vf_binop.h"
DEF_VF_BINOP_CASE_3_WRAP (float, __builtin_fminf, min, VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_CASE_3_WRAP (float, __builtin_fmaxf, max, VF_BINOP_FUNC_BODY_X128)
/* { dg-final { scan-assembler {vfmin.vf} } } */
+/* { dg-final { scan-assembler {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f64.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f64.c
index 474b33900ce..f785f855820 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f64.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-7-f64.c
@@ -4,5 +4,7 @@
#include "vf_binop.h"
DEF_VF_BINOP_CASE_3_WRAP (double, __builtin_fmin, min, VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_CASE_3_WRAP (double, __builtin_fmax, max, VF_BINOP_FUNC_BODY_X128)
/* { dg-final { scan-assembler {vfmin.vf} } } */
+/* { dg-final { scan-assembler {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f16.c
index bd68b3cdf22..5c91c88a920 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f16.c
@@ -1,6 +1,7 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d --param=fpr2vr-cost=4" } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -fno-fast-math --param=fpr2vr-cost=4" } */
#include "vf-7-f16.c"
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
+/* { dg-final { scan-assembler-not {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f32.c
index 000402c1520..13237f12897 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f32.c
@@ -1,6 +1,7 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=4" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -fno-fast-math --param=fpr2vr-cost=4" } */
#include "vf-7-f32.c"
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
+/* { dg-final { scan-assembler-not {vfmax.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f64.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f64.c
index 89dec81fba4..109913c4290 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f64.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-8-f64.c
@@ -1,6 +1,7 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=4" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -fno-fast-math --param=fpr2vr-cost=4" } */
#include "vf-7-f64.c"
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
+/* { dg-final { scan-assembler-not {vfmax.vf} } } */