https://gcc.gnu.org/g:ef533d234293585444d3fce7f4eea75699a25c20
commit r16-6380-gef533d234293585444d3fce7f4eea75699a25c20 Author: Claudio Bantaloukas <[email protected]> Date: Wed Dec 24 11:41:25 2025 +0000 aarch64: add narrowing sme2 conversions to fp8 This patch adds the following intrinsics (all __arm_streaming only) along with asm tests for them. BFCVT, FCVT Convert to packed 8-bit floating-point format: - svmfloat8_t svcvt_mf8[_f16_x2]_fpm(svfloat16x2_t zn, fpm_t fpm) - svmfloat8_t svcvt_mf8[_bf16_x2]_fpm(svbfloat16x2_t zn, fpm_t fpm) - svmfloat8_t svcvt_mf8[_f32_x4]_fpm(svfloat32x4_t zn, fpm_t fpm) FCVTN Convert to interleaved 8-bit floating-point format. - svmfloat8_t svcvtn_mf8[_f32_x4]_fpm(svfloat32x4_t zn, fpm_t fpm) gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svcvt_impl): Update to handle fp8 cases. * config/aarch64/aarch64-sve-builtins-sve2.def (svcvt, svcvtn): Added DEF_SVE_FUNCTION_GS_FPM instances. * config/aarch64/aarch64-sve2.md (@aarch64_sve2_fp8_cvtn<mode>): Updated define_insn for additional case. (@aarch64_sme2_fp8_cvt<mode>): Added new define_insn. * config/aarch64/iterators.md (VNx16F_NARROW): Added new iterator to handle narrowing SVE floating point operations. (UNSPEC_FCVT): Added new unspec. gcc/testsuite/ * gcc.target/aarch64/sme2/acle-asm/cvt_mf8_bf16_x2.c: Added test file. * gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f32_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/cvtn_mf8_f32_x4.c: Likewise. * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_X2_NARROW): Added fpm0 argument for intrinsics. (TEST_X4_NARROW): Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 26 +++++--- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 3 + gcc/config/aarch64/aarch64-sve2.md | 14 ++++- gcc/config/aarch64/iterators.md | 4 ++ .../aarch64/sme2/acle-asm/cvt_mf8_bf16_x2.c | 56 +++++++++++++++++ .../aarch64/sme2/acle-asm/cvt_mf8_f16_x2.c | 56 +++++++++++++++++ .../aarch64/sme2/acle-asm/cvt_mf8_f32_x4.c | 72 ++++++++++++++++++++++ .../aarch64/sme2/acle-asm/cvtn_mf8_f32_x4.c | 72 ++++++++++++++++++++++ .../aarch64/sve/acle/asm/test_sve_acle.h | 2 + 9 files changed, 293 insertions(+), 12 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index ecc06877cac5..622485effb38 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -779,17 +779,23 @@ public: { machine_mode mode0 = e.result_mode (); machine_mode mode1 = GET_MODE (e.args[0]); - convert_optab optab; - if (e.type_suffix (0).integer_p) - optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab; - else if (e.type_suffix (1).integer_p) - optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab; - else if (e.type_suffix (0).element_bits - < e.type_suffix (1).element_bits) - optab = trunc_optab; + if (e.fpm_mode == aarch64_sve::FPM_set) + icode = code_for_aarch64_sme2_fp8_cvt (mode1); else - optab = sext_optab; - icode = convert_optab_handler (optab, mode0, mode1); + { + convert_optab optab; + if (e.type_suffix (0).integer_p) + optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab; + else if (e.type_suffix (1).integer_p) + optab = e.type_suffix (1).unsigned_p ? ufloat_optab + : sfloat_optab; + else if (e.type_suffix (0).element_bits + < e.type_suffix (1).element_bits) + optab = trunc_optab; + else + optab = sext_optab; + icode = convert_optab_handler (optab, mode0, mode1); + } gcc_assert (icode != CODE_FOR_nothing); return e.use_exact_insn (icode); } diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index c271b97de87c..869e006ffdeb 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -421,6 +421,9 @@ DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, h_flo #define REQUIRED_EXTENSIONS \ streaming_only (AARCH64_FL_SME2 | AARCH64_FL_FP8) +DEF_SVE_FUNCTION_GS_FPM (svcvt, unary_convertxn_narrow, cvtn_mf8, x2, none, set) +DEF_SVE_FUNCTION_GS_FPM (svcvt, unary_convertxn_narrow, cvtnx_mf8, x4, none, set) +DEF_SVE_FUNCTION_GS_FPM (svcvtn, unary_convertxn_narrow, cvtnx_mf8, x4, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvt1, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvt2, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS_FPM (svcvtl1, unary_convert, cvt_mf8, x2, none, set) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index ab8098d33278..d1981b9b833d 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -3635,10 +3635,10 @@ (define_insn "@aarch64_sve2_fp8_cvtn<mode>" [(set (match_operand:VNx16QI 0 "register_operand" "=w") (unspec:VNx16QI - [(match_operand:SVE_FULL_HFx2 1 "aligned_register_operand" "Uw2") + [(match_operand:VNx16F_NARROW 1 "aligned_register_operand" "Uw<vector_count>") (reg:DI FPM_REGNUM)] UNSPEC_FP8FCVTN))] - "TARGET_SSVE_FP8" + "<MODE>mode == VNx16SFmode ? TARGET_SSME2_FP8 : TARGET_SSVE_FP8" "<b>fcvtn\t%0.b, %1" [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -3666,6 +3666,16 @@ [(set_attr "sve_type" "sve_fp_cvt")] ) +(define_insn "@aarch64_sme2_fp8_cvt<mode>" + [(set (match_operand:VNx16QI 0 "register_operand" "=w") + (unspec:VNx16QI + [(match_operand:VNx16F_NARROW 1 "aligned_register_operand" "Uw<vector_count>") + (reg:DI FPM_REGNUM)] + UNSPEC_FCVT))] + "TARGET_SSME2_FP8" + "<b>fcvt\t%0.b, %1" +) + ;; ------------------------------------------------------------------------- ;; ---- [FP<-INT] Multi-vector conversions ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 026c3101e38d..7ec731d2d6b4 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -712,6 +712,9 @@ (define_mode_iterator VNx2_NARROW [VNx2QI VNx2HI VNx2SI]) (define_mode_iterator VNx2_WIDE [VNx2DI]) +;; Used for narrowing SVE floating point operations. +(define_mode_iterator VNx16F_NARROW [SVE_FULL_HFx2 VNx16SF]) + ;; All SVE predicate modes. (define_mode_iterator PRED_ALL [VNx16BI VNx8BI VNx4BI VNx2BI]) @@ -1061,6 +1064,7 @@ UNSPEC_F2CVTL ; Used in aarch64-sve2.md. UNSPEC_F2CVTLT ; Used in aarch64-sve2.md. UNSPEC_FADDP ; Used in aarch64-sve2.md. + UNSPEC_FCVT ; Used in aarch64-sve2.md. UNSPEC_FCVTNB ; Used in aarch64-sve2.md. UNSPEC_FCVTNT ; Used in aarch64-sve2.md. UNSPEC_FMAXNMP ; Used in aarch64-sve2.md. diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_bf16_x2.c new file mode 100644 index 000000000000..bdda0fd36d67 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_bf16_x2.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#pragma GCC target "+fp8+bf16" +#include "test_sme2_acle.h" + +/* +** cvt_z0_z0: +** msr fpmr, x0 +** bfcvt z0\.b, {z0\.h - z1\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z0, svbfloat16x2_t, svmfloat8_t, + z0_res = svcvt_mf8_bf16_x2_fpm (z0, fpm0), + z0_res = svcvt_mf8_fpm (z0, fpm0)) + +/* +** cvt_z0_z6: +** msr fpmr, x0 +** bfcvt z0\.b, {z6\.h - z7\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z6, svbfloat16x2_t, svmfloat8_t, + z0_res = svcvt_mf8_bf16_x2_fpm (z6, fpm0), + z0_res = svcvt_mf8_fpm (z6, fpm0)) + +/* +** cvt_z0_z29: +** msr fpmr, x0 +** mov [^\n]+ +** mov [^\n]+ +** bfcvt z0\.b, [^\n]+ +** ret +*/ +TEST_X2_NARROW (cvt_z0_z29, svbfloat16x2_t, svmfloat8_t, + z0_res = svcvt_mf8_bf16_x2_fpm (z29, fpm0), + z0_res = svcvt_mf8_fpm (z29, fpm0)) + +/* +** cvt_z5_z0: +** msr fpmr, x0 +** bfcvt z5\.b, {z0\.h - z1\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z5_z0, svbfloat16x2_t, svmfloat8_t, + z5 = svcvt_mf8_bf16_x2_fpm (z0, fpm0), + z5 = svcvt_mf8_fpm (z0, fpm0)) + +/* +** cvt_z22_z16: +** msr fpmr, x0 +** bfcvt z22\.b, {z16\.h - z17\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z22_z16, svbfloat16x2_t, svmfloat8_t, + z22 = svcvt_mf8_bf16_x2_fpm (z16, fpm0), + z22 = svcvt_mf8_fpm (z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f16_x2.c new file mode 100644 index 000000000000..93792e909450 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f16_x2.c @@ -0,0 +1,56 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** cvt_z0_z0: +** msr fpmr, x0 +** fcvt z0\.b, {z0\.h - z1\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z0, svfloat16x2_t, svmfloat8_t, + z0_res = svcvt_mf8_f16_x2_fpm (z0, fpm0), + z0_res = svcvt_mf8_fpm (z0, fpm0)) + +/* +** cvt_z0_z6: +** msr fpmr, x0 +** fcvt z0\.b, {z6\.h - z7\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z6, svfloat16x2_t, svmfloat8_t, + z0_res = svcvt_mf8_f16_x2_fpm (z6, fpm0), + z0_res = svcvt_mf8_fpm (z6, fpm0)) + +/* +** cvt_z0_z29: +** msr fpmr, x0 +** mov [^\n]+ +** mov [^\n]+ +** fcvt z0\.b, {z0\.h - z1\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z0_z29, svfloat16x2_t, svmfloat8_t, + z0_res = svcvt_mf8_f16_x2_fpm (z29, fpm0), + z0_res = svcvt_mf8_fpm (z29, fpm0)) + +/* +** cvt_z5_z0: +** msr fpmr, x0 +** fcvt z5\.b, {z0\.h - z1\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z5_z0, svfloat16x2_t, svmfloat8_t, + z5 = svcvt_mf8_f16_x2_fpm (z0, fpm0), + z5 = svcvt_mf8_fpm (z0, fpm0)) + +/* +** cvt_z22_z16: +** msr fpmr, x0 +** fcvt z22\.b, {z16\.h - z17\.h} +** ret +*/ +TEST_X2_NARROW (cvt_z22_z16, svfloat16x2_t, svmfloat8_t, + z22 = svcvt_mf8_f16_x2_fpm (z16, fpm0), + z22 = svcvt_mf8_fpm (z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f32_x4.c new file mode 100644 index 000000000000..a9ee10de0df1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_f32_x4.c @@ -0,0 +1,72 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** cvt_z0_z0: +** msr fpmr, x0 +** fcvt z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (cvt_z0_z0, svfloat32x4_t, svmfloat8_t, + z0_res = svcvt_mf8_f32_x4_fpm (z0, fpm0), + z0_res = svcvt_mf8_fpm (z0, fpm0)) + +/* +** cvt_z0_z4: +** msr fpmr, x0 +** fcvt z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (cvt_z0_z4, svfloat32x4_t, svmfloat8_t, + z0_res = svcvt_mf8_f32_x4_fpm (z4, fpm0), + z0_res = svcvt_mf8_fpm (z4, fpm0)) + +/* +** cvt_z0_z21: +** msr fpmr, x0 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvt z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (cvt_z0_z21, svfloat32x4_t, svmfloat8_t, + z0_res = svcvt_mf8_f32_x4_fpm (z21, fpm0), + z0_res = svcvt_mf8_fpm (z21, fpm0)) + +/* +** cvt_z25_z26: +** msr fpmr, x0 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvt z25\.b, {z28\.s - z31\.s} +** ret +*/ +TEST_X4_NARROW (cvt_z25_z26, svfloat32x4_t, svmfloat8_t, + z25 = svcvt_mf8_f32_x4_fpm (z26, fpm0), + z25 = svcvt_mf8_fpm (z26, fpm0)) + +/* +** cvt_z25_z0: +** msr fpmr, x0 +** fcvt z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (cvt_z25_z0, svfloat32x4_t, svmfloat8_t, + z25 = svcvt_mf8_f32_x4_fpm (z0, fpm0), + z25 = svcvt_mf8_fpm (z0, fpm0)) + +/* +** cvt_z22_z16: +** msr fpmr, x0 +** fcvt z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (cvt_z22_z16, svfloat32x4_t, svmfloat8_t, + z22_res = svcvt_mf8_f32_x4_fpm (z16, fpm0), + z22_res = svcvt_mf8_fpm (z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_mf8_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_mf8_f32_x4.c new file mode 100644 index 000000000000..2d0bd7eda0ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtn_mf8_f32_x4.c @@ -0,0 +1,72 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+fp8" + +/* +** cvtn_z0_z0: +** msr fpmr, x0 +** fcvtn z0\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (cvtn_z0_z0, svfloat32x4_t, svmfloat8_t, + z0_res = svcvtn_mf8_f32_x4_fpm (z0, fpm0), + z0_res = svcvtn_mf8_fpm (z0, fpm0)) + +/* +** cvtn_z0_z4: +** msr fpmr, x0 +** fcvtn z0\.b, {z4\.s - z7\.s} +** ret +*/ +TEST_X4_NARROW (cvtn_z0_z4, svfloat32x4_t, svmfloat8_t, + z0_res = svcvtn_mf8_f32_x4_fpm (z4, fpm0), + z0_res = svcvtn_mf8_fpm (z4, fpm0)) + +/* +** cvtn_z0_z21: +** msr fpmr, x0 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtn z0\.b, [^\n]+ +** ret +*/ +TEST_X4_NARROW (cvtn_z0_z21, svfloat32x4_t, svmfloat8_t, + z0_res = svcvtn_mf8_f32_x4_fpm (z21, fpm0), + z0_res = svcvtn_mf8_fpm (z21, fpm0)) + +/* +** cvtn_z25_z26: +** msr fpmr, x0 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fcvtn z25\.b, {z28\.s - z31\.s} +** ret +*/ +TEST_X4_NARROW (cvtn_z25_z26, svfloat32x4_t, svmfloat8_t, + z25 = svcvtn_mf8_f32_x4_fpm (z26, fpm0), + z25 = svcvtn_mf8_fpm (z26, fpm0)) + +/* +** cvtn_z25_z0: +** msr fpmr, x0 +** fcvtn z25\.b, {z0\.s - z3\.s} +** ret +*/ +TEST_X4_NARROW (cvtn_z25_z0, svfloat32x4_t, svmfloat8_t, + z25 = svcvtn_mf8_f32_x4_fpm (z0, fpm0), + z25 = svcvtn_mf8_fpm (z0, fpm0)) + +/* +** cvtn_z22_z16: +** msr fpmr, x0 +** fcvtn z22\.b, {z16\.s - z19\.s} +** ret +*/ +TEST_X4_NARROW (cvtn_z22_z16, svfloat32x4_t, svmfloat8_t, + z22_res = svcvtn_mf8_f32_x4_fpm (z16, fpm0), + z22_res = svcvtn_mf8_fpm (z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index 7c156c4cf2a7..8d4ed537c871 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -733,6 +733,7 @@ #define TEST_X2_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ PROTO (NAME, void, ()) \ { \ + register fpm_t fpm0 __asm ("x0"); \ register TTYPE z0 __asm ("z0"); \ register ZTYPE z5 __asm ("z5"); \ register TTYPE z6 __asm ("z6"); \ @@ -749,6 +750,7 @@ #define TEST_X4_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ PROTO (NAME, void, ()) \ { \ + register fpm_t fpm0 __asm ("x0"); \ register TTYPE z0 __asm ("z0"); \ register TTYPE z4 __asm ("z4"); \ register TTYPE z16 __asm ("z16"); \
