The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and mandatory from Armv9.5-a. It introduces instructions for computing the floating point absolute maximum and minimum of the two vectors element-wise.
This patch introduces AdvSIMD faminmax intrinsics. The intrinsics of this extension are implemented as the following builtin functions: * vamax_f16 * vamaxq_f16 * vamax_f32 * vamaxq_f32 * vamaxq_f64 * vamin_f16 * vaminq_f16 * vamin_f32 * vaminq_f32 * vaminq_f64 gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins): New enum values for faminmax builtins. (aarch64_init_faminmax_builtins): New function to declare new builtins. (handle_arm_neon_h): Modify to call aarch64_init_faminmax_builtins. (aarch64_general_check_builtin_call): Modify to check whether +faminmax flag is being used and printing error message if not being used. (aarch64_expand_builtin_faminmax): New function to emit instructions of this extension. (aarch64_general_expand_builtin): Modify to call aarch64_expand_builtin_faminmax. * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): Introduce new flag for this extension. * config/aarch64/aarch64-simd.md (aarch64_<faminmax_uns_op><mode>): Instruction pattern for faminmax intrinsics. * config/aarch64/aarch64.h (TARGET_FAMINMAX): Introduce new flag for this extension. * config/aarch64/iterators.md: Introduce new iterators for faminmax intrinsics. * config/arm/types.md: Introduce neon_fp_aminmax<q> attributes. * doc/invoke.texi: Document extension in AArch64 Options. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/faminmax-builtins-no-flag.c: New test. * gcc.target/aarch64/simd/faminmax-builtins.c: New test. --- gcc/config/aarch64/aarch64-builtins.cc | 126 ++++++++++++++++++ .../aarch64/aarch64-option-extensions.def | 2 + gcc/config/aarch64/aarch64-simd.md | 11 ++ gcc/config/aarch64/aarch64.h | 4 + gcc/config/aarch64/iterators.md | 9 ++ gcc/config/arm/types.md | 6 + gcc/doc/invoke.texi | 2 + .../aarch64/simd/faminmax-builtins-no-flag.c | 10 ++ .../aarch64/simd/faminmax-builtins.c | 115 ++++++++++++++++ 9 files changed, 285 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index eb878b933fe..95ec8b6cccc 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -829,6 +829,17 @@ enum aarch64_builtins AARCH64_RBIT, AARCH64_RBITL, AARCH64_RBITLL, + /* FAMINMAX builtins. */ + AARCH64_FAMINMAX_BUILTIN_FAMAX4H, + AARCH64_FAMINMAX_BUILTIN_FAMAX8H, + AARCH64_FAMINMAX_BUILTIN_FAMAX2S, + AARCH64_FAMINMAX_BUILTIN_FAMAX4S, + AARCH64_FAMINMAX_BUILTIN_FAMAX2D, + AARCH64_FAMINMAX_BUILTIN_FAMIN4H, + AARCH64_FAMINMAX_BUILTIN_FAMIN8H, + AARCH64_FAMINMAX_BUILTIN_FAMIN2S, + AARCH64_FAMINMAX_BUILTIN_FAMIN4S, + AARCH64_FAMINMAX_BUILTIN_FAMIN2D, /* System register builtins. */ AARCH64_RSR, AARCH64_RSRP, @@ -1547,6 +1558,66 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) } } +/* Initialize the absolute maximum/minimum (FAMINMAX) builtins. */ + +typedef struct +{ + const char *name; + unsigned int code; + tree eltype; + machine_mode mode; +} faminmax_builtins_data; + +static void +aarch64_init_faminmax_builtins () +{ + faminmax_builtins_data data[] = { + /* Absolute maximum. */ + {"vamax_f16", AARCH64_FAMINMAX_BUILTIN_FAMAX4H, + aarch64_simd_types[Float16x4_t].eltype, + aarch64_simd_types[Float16x4_t].mode}, + {"vamaxq_f16", AARCH64_FAMINMAX_BUILTIN_FAMAX8H, + aarch64_simd_types[Float16x8_t].eltype, + aarch64_simd_types[Float16x8_t].mode}, + {"vamax_f32", AARCH64_FAMINMAX_BUILTIN_FAMAX2S, + aarch64_simd_types[Float32x2_t].eltype, + aarch64_simd_types[Float32x2_t].mode}, + {"vamaxq_f32", AARCH64_FAMINMAX_BUILTIN_FAMAX4S, + aarch64_simd_types[Float32x4_t].eltype, + aarch64_simd_types[Float32x4_t].mode}, + {"vamaxq_f64", AARCH64_FAMINMAX_BUILTIN_FAMAX2D, + aarch64_simd_types[Float64x2_t].eltype, + aarch64_simd_types[Float64x2_t].mode}, + /* Absolute minimum. */ + {"vamin_f16", AARCH64_FAMINMAX_BUILTIN_FAMIN4H, + aarch64_simd_types[Float16x4_t].eltype, + aarch64_simd_types[Float16x4_t].mode}, + {"vaminq_f16", AARCH64_FAMINMAX_BUILTIN_FAMIN8H, + aarch64_simd_types[Float16x8_t].eltype, + aarch64_simd_types[Float16x8_t].mode}, + {"vamin_f32", AARCH64_FAMINMAX_BUILTIN_FAMIN2S, + aarch64_simd_types[Float32x2_t].eltype, + aarch64_simd_types[Float32x2_t].mode}, + {"vaminq_f32", AARCH64_FAMINMAX_BUILTIN_FAMIN4S, + aarch64_simd_types[Float32x4_t].eltype, + aarch64_simd_types[Float32x4_t].mode}, + {"vaminq_f64", AARCH64_FAMINMAX_BUILTIN_FAMIN2D, + aarch64_simd_types[Float64x2_t].eltype, + aarch64_simd_types[Float64x2_t].mode}, + }; + + for (size_t i = 0; i < ARRAY_SIZE (data); ++i) + { + tree type + = build_vector_type (data[i].eltype, GET_MODE_NUNITS (data[i].mode)); + tree fntype = build_function_type_list (type, type, type, NULL_TREE); + unsigned int code = data[i].code; + const char *name = data[i].name; + aarch64_builtin_decls[code] + = aarch64_general_simulate_builtin (name, fntype, code); + } +} + /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type indexed by TYPE_INDEX. */ static void @@ -1640,6 +1711,7 @@ handle_arm_neon_h (void) aarch64_init_simd_builtin_functions (true); aarch64_init_simd_intrinsics (); + aarch64_init_faminmax_builtins (); } static void @@ -2317,6 +2389,19 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>, return aarch64_check_required_extensions (location, decl, AARCH64_FL_LS64); + case AARCH64_FAMINMAX_BUILTIN_FAMAX4H: + case AARCH64_FAMINMAX_BUILTIN_FAMAX8H: + case AARCH64_FAMINMAX_BUILTIN_FAMAX2S: + case AARCH64_FAMINMAX_BUILTIN_FAMAX4S: + case AARCH64_FAMINMAX_BUILTIN_FAMAX2D: + case AARCH64_FAMINMAX_BUILTIN_FAMIN4H: + case AARCH64_FAMINMAX_BUILTIN_FAMIN8H: + case AARCH64_FAMINMAX_BUILTIN_FAMIN2S: + case AARCH64_FAMINMAX_BUILTIN_FAMIN4S: + case AARCH64_FAMINMAX_BUILTIN_FAMIN2D: + return aarch64_check_required_extensions (location, decl, + AARCH64_FL_FAMINMAX); + default: break; } @@ -3189,6 +3274,44 @@ aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) return ops[0].value; } +static rtx +aarch64_expand_builtin_faminmax (unsigned int fcode, tree exp, rtx target) +{ + machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); + rtx op0 = force_reg (mode, expand_normal (CALL_EXPR_ARG (exp, 0))); + rtx op1 = force_reg (mode, expand_normal (CALL_EXPR_ARG (exp, 1))); + + enum insn_code icode; + if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX4H) + icode = CODE_FOR_aarch64_famaxv4hf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX8H) + icode = CODE_FOR_aarch64_famaxv8hf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX2S) + icode = CODE_FOR_aarch64_famaxv2sf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX4S) + icode = CODE_FOR_aarch64_famaxv4sf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMAX2D) + icode = CODE_FOR_aarch64_famaxv2df; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN4H) + icode = CODE_FOR_aarch64_faminv4hf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN8H) + icode = CODE_FOR_aarch64_faminv8hf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN2S) + icode = CODE_FOR_aarch64_faminv2sf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN4S) + icode = CODE_FOR_aarch64_faminv4sf; + else if (fcode == AARCH64_FAMINMAX_BUILTIN_FAMIN2D) + icode = CODE_FOR_aarch64_faminv2df; + else + gcc_unreachable (); + + rtx pat = GEN_FCN (icode) (target, op0, op1); + + emit_insn (pat); + + return target; +} + /* Expand an expression EXP as fpsr or fpcr setter (depending on UNSPEC) using MODE. */ static void @@ -3368,6 +3491,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, if (fcode >= AARCH64_REV16 && fcode <= AARCH64_RBITLL) return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); + if (fcode >= AARCH64_FAMINMAX_BUILTIN_FAMAX4H + && fcode <= AARCH64_FAMINMAX_BUILTIN_FAMIN2D) + return aarch64_expand_builtin_faminmax (fcode, exp, target); gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 6998627f377..8279f5a76ea 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -234,6 +234,8 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8") +AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") + #undef AARCH64_OPT_FMV_EXTENSION #undef AARCH64_OPT_EXTENSION #undef AARCH64_FMV_FEATURE diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 23c03a96371..488e27c36a9 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9910,3 +9910,14 @@ "shl\\t%d0, %d1, #16" [(set_attr "type" "neon_shift_imm")] ) + +;; faminmax +(define_insn "aarch64_<faminmax_uns_op><mode>" + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") + (match_operand:VHSDF 2 "register_operand" "w")] + FAMINMAX_UNS))] + "TARGET_FAMINMAX" + "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_aminmax<q>")] +) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 2dfb999bea5..de14f57071a 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -457,6 +457,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED enabled through +gcs. */ #define TARGET_GCS AARCH64_HAVE_ISA (GCS) +/* Floating Point Absolute Maximum/Minimum extension instructions are + enabled through +faminmax. */ +#define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) + /* Prefer different predicate registers for the output of a predicated operation over re-using an existing input predicate. */ #define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 20a318e023b..17ac5e073aa 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1057,6 +1057,8 @@ UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. UNSPEC_BFCVT ; Used in aarch64-simd.md. UNSPEC_FCVTXN ; Used in aarch64-simd.md. + UNSPEC_FAMAX ; Used in aarch64-simd.md. + UNSPEC_FAMIN ; Used in aarch64-simd.md. ;; All used in aarch64-sve2.md UNSPEC_FCVTN @@ -4463,3 +4465,10 @@ (UNSPECV_SET_FPCR "fpcr")]) (define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")]) + +;; Iterators and attributes for faminmax + +(define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN]) + +(define_int_attr faminmax_uns_op + [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index 9527bdb9e87..d8de9dbc9d1 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -492,6 +492,8 @@ ; neon_fp_reduc_minmax_s_q ; neon_fp_reduc_minmax_d ; neon_fp_reduc_minmax_d_q +; neon_fp_aminmax +; neon_fp_aminmax_q ; neon_fp_cvt_narrow_s_q ; neon_fp_cvt_narrow_d_q ; neon_fp_cvt_widen_h @@ -1044,6 +1046,8 @@ neon_fp_reduc_minmax_d,\ neon_fp_reduc_minmax_d_q,\ \ + neon_fp_aminmax,\ + neon_fp_aminmax_q,\ neon_fp_cvt_narrow_s_q,\ neon_fp_cvt_narrow_d_q,\ neon_fp_cvt_widen_h,\ @@ -1264,6 +1268,8 @@ neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\ neon_fp_reduc_minmax_d_q,\ + neon_fp_aminmax, neon_fp_aminmax_q,\ + neon_fp_aminmax, neon_fp_aminmax_q,\ neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\ neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\ neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 32b772d2a8a..2c509f62d98 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21865,6 +21865,8 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension. Enable the RCpc3 (Release Consistency) extension. @item fp8 Enable the fp8 (8-bit floating point) extension. +@item faminmax +Enable the Floating Point Absolute Maximum/Minimum extension. @end table diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c new file mode 100644 index 00000000000..63ed1508c23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c @@ -0,0 +1,10 @@ +/* { dg-do assemble} */ +/* { dg-additional-options "-march=armv9-a" } */ + +#include "arm_neon.h" + +void +test (float32x4_t a, float32x4_t b) +{ + vamaxq_f32 (a, b); /* { dg-error {ACLE function 'vamaxq_f32' requires ISA extension 'faminmax'} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c new file mode 100644 index 00000000000..7e4f3eba81a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c @@ -0,0 +1,115 @@ +/* { dg-do assemble} */ +/* { dg-additional-options "-O3 -march=armv9-a+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vamax_f16: +** famax v0.4h, v0.4h, v1.4h +** ret +*/ +float16x4_t +test_vamax_f16 (float16x4_t a, float16x4_t b) +{ + return vamax_f16 (a, b); +} + +/* +** test_vamaxq_f16: +** famax v0.8h, v0.8h, v1.8h +** ret +*/ +float16x8_t +test_vamaxq_f16 (float16x8_t a, float16x8_t b) +{ + return vamaxq_f16 (a, b); +} + +/* +** test_vamax_f32: +** famax v0.2s, v0.2s, v1.2s +** ret +*/ +float32x2_t +test_vamax_f32 (float32x2_t a, float32x2_t b) +{ + return vamax_f32 (a, b); +} + +/* +** test_vamaxq_f32: +** famax v0.4s, v0.4s, v1.4s +** ret +*/ +float32x4_t +test_vamaxq_f32 (float32x4_t a, float32x4_t b) +{ + return vamaxq_f32 (a, b); +} + +/* +** test_vamaxq_f64: +** famax v0.2d, v0.2d, v1.2d +** ret +*/ +float64x2_t +test_vamaxq_f64 (float64x2_t a, float64x2_t b) +{ + return vamaxq_f64 (a, b); +} + +/* +** test_vamin_f16: +** famin v0.4h, v0.4h, v1.4h +** ret +*/ +float16x4_t +test_vamin_f16 (float16x4_t a, float16x4_t b) +{ + return vamin_f16 (a, b); +} + +/* +** test_vaminq_f16: +** famin v0.8h, v0.8h, v1.8h +** ret +*/ +float16x8_t +test_vaminq_f16 (float16x8_t a, float16x8_t b) +{ + return vaminq_f16 (a, b); +} + +/* +** test_vamin_f32: +** famin v0.2s, v0.2s, v1.2s +** ret +*/ +float32x2_t +test_vamin_f32 (float32x2_t a, float32x2_t b) +{ + return vamin_f32 (a, b); +} + +/* +** test_vaminq_f32: +** famin v0.4s, v0.4s, v1.4s +** ret +*/ +float32x4_t +test_vaminq_f32 (float32x4_t a, float32x4_t b) +{ + return vaminq_f32 (a, b); +} + +/* +** test_vaminq_f64: +** famin v0.2d, v0.2d, v1.2d +** ret +*/ +float64x2_t +test_vaminq_f64 (float64x2_t a, float64x2_t b) +{ + return vaminq_f64 (a, b); +}