https://gcc.gnu.org/g:d2a09a3d3b563d266a88eb7773dc45f2ab4bdea6
commit r16-6385-gd2a09a3d3b563d266a88eb7773dc45f2ab4bdea6 Author: Karl Meakin <[email protected]> Date: Wed Dec 24 11:41:27 2025 +0000 aarch64: add 8-bit floating point dot product This patch adds support for the following intrinsics when sme-f8f16 is enabled: * svdot_za16[_mf8]_vg1x2_fpm * svdot_za16[_mf8]_vg1x4_fpm * svdot[_single]_za16[_mf8]_vg1x2_fpm * svdot[_single]_za16[_mf8]_vg1x4_fpm * svdot_lane_za16[_mf8]_vg1x2_fpm * svdot_lane_za16[_mf8]_vg1x4_fpm This patch adds support for the following intrinsics when sme-f8f32 is enabled: * svdot_za32[_mf8]_vg1x2_fpm * svdot_za32[_mf8]_vg1x4_fpm * svdot[_single]_za32[_mf8]_vg1x2_fpm * svdot[_single]_za32[_mf8]_vg1x4_fpm * svdot_lane_za32[_mf8]_vg1x2_fpm * svdot_lane_za32[_mf8]_vg1x4_fpm * svvdot_lane_za32[_mf8]_vg1x2_fpm * svvdotb_lane_za32[_mf8]_vg1x4_fpm * svvdott_lane_za32[_mf8]_vg1x4_fpm gcc: * config/aarch64/aarch64-sme.md (@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>): New insn. (@aarch64_fvdot_half<optab>): Likewise. (@aarch64_fvdot_half<optab>_plus): Likewise. * config/aarch64/aarch64-sve-builtins-functions.h (class svvdot_half_impl): New function impl. * config/aarch64/aarch64-sve-builtins-sme.cc (FUNCTION): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.cc (struct dot_half_za_slice_lane_def): New function shape. * config/aarch64/aarch64-sve-builtins-shapes.h: Likewise. * config/aarch64/aarch64-sve-builtins-sme.def (svdot): New function. (svdot_lane): Likewise. (svvdot_lane): Likewise. (svvdotb_lane): Likewise. (svvdott_lane): Likewise. * config/aarch64/aarch64-sve-builtins-sme.h (svvdotb_lane_za): New function. (svvdott_lane_za): Likewise. * config/aarch64/aarch64-sve-builtins.cc (TYPES_za_s_mf8): New types array. (TYPES_za_hs_mf8): Likewise. (za_hs_mf8): Likewise. * config/aarch64/iterators.md (SME_ZA_F8F16): New mode iterator. (SME_ZA_F8F32): Likewise. (SME_ZA_FP8_x1): Likewise. (SME_ZA_FP8_x2): Likewise. (SME_ZA_FP8_x4): Likewise. (UNSPEC_SME_FDOT_FP8): New unspec. (UNSPEC_SME_FVDOT_FP8): Likewise. (UNSPEC_SME_FVDOTT_FP8): Likewise. (UNSPEC_SME_FVDOTB_FP8): Likewise. (SME_FP8_DOTPROD): New int iterator. (SME_FP8_FVDOT): Likewise. (SME_FP8_FVDOT_HALF): Likewise. gcc/testsuite: * gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/vdot_lane_za16_mf8_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/vdotb_lane_za32_mf8_vg1x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/vdott_lane_za32_mf8_vg1x4.c: New test. * gcc.target/aarch64/sve/acle/general-c/dot_half_za_slice_lane_fpm.c: New test. Diff: --- gcc/config/aarch64/aarch64-sme.md | 299 +++++++++++++++++++++ .../aarch64/aarch64-sve-builtins-functions.h | 18 ++ gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 37 +++ gcc/config/aarch64/aarch64-sve-builtins-shapes.h | 1 + gcc/config/aarch64/aarch64-sve-builtins-sme.cc | 13 +- gcc/config/aarch64/aarch64-sve-builtins-sme.def | 27 ++ gcc/config/aarch64/aarch64-sve-builtins-sme.h | 2 + gcc/config/aarch64/aarch64-sve-builtins.cc | 9 + gcc/config/aarch64/iterators.md | 20 ++ .../sme2/acle-asm/dot_lane_za16_mf8_vg1x2.c | 119 ++++++++ .../sme2/acle-asm/dot_lane_za16_mf8_vg1x4.c | 125 +++++++++ .../sme2/acle-asm/dot_lane_za32_mf8_vg1x2.c | 119 ++++++++ .../sme2/acle-asm/dot_lane_za32_mf8_vg1x4.c | 125 +++++++++ .../sme2/acle-asm/dot_single_za16_mf8_vg1x2.c | 126 +++++++++ .../sme2/acle-asm/dot_single_za16_mf8_vg1x4.c | 126 +++++++++ .../sme2/acle-asm/dot_single_za32_mf8_vg1x2.c | 126 +++++++++ .../sme2/acle-asm/dot_single_za32_mf8_vg1x4.c | 126 +++++++++ .../aarch64/sme2/acle-asm/dot_za16_mf8_vg1x2.c | 150 +++++++++++ .../aarch64/sme2/acle-asm/dot_za16_mf8_vg1x4.c | 166 ++++++++++++ .../aarch64/sme2/acle-asm/dot_za32_mf8_vg1x2.c | 150 +++++++++++ .../aarch64/sme2/acle-asm/dot_za32_mf8_vg1x4.c | 166 ++++++++++++ .../sme2/acle-asm/vdot_lane_za16_mf8_vg1x2.c | 119 ++++++++ .../sme2/acle-asm/vdotb_lane_za32_mf8_vg1x4.c | 119 ++++++++ .../sme2/acle-asm/vdott_lane_za32_mf8_vg1x4.c | 119 ++++++++ .../acle/general-c/dot_half_za_slice_lane_fpm.c | 106 ++++++++ 25 files changed, 2510 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 7201e0f0ec37..445c3d899a09 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -45,6 +45,7 @@ ;; ---- [INT] Ternary widening arithmetic on ZA slice ;; ---- [INT] Sum of outer products ;; ---- [FP] Dot product +;; ---- [FP8] Dot product ;; ---- [FP] Ternary arithmetic on ZA slice ;; ---- [FP] Ternary widening arithmetic on ZA slice ;; ---- [FP] Sum of outer products @@ -1892,6 +1893,304 @@ "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h[%4]" ) +;; ------------------------------------------------------------------------- +;; ---- [FP8] Dot product +;; ------------------------------------------------------------------------- +;; Includes: +;; - FDOT (2-way, multiple vectors, FP8 to FP16) +;; - 2 ZA single-vectors (svdot_za16_mf8_vg1x2_fpm) +;; - 4 ZA single-vectors (svdot_za16_mf8_vg1x4_fpm) +;; - FDOT (4-way, multiple vectors) +;; - 2 ZA single-vectors (svdot_za32_mf8_vg1x2_fpm) +;; - 4 ZA single-vectors (svdot_za32_mf8_vg1x4_fpm) + +;; - FDOT (2-way, multiple and single vector, FP8 to FP16) +;; - 2 ZA single-vectors (svdot_single_za16_mf8_vg1x2_fpm) +;; - 4 ZA single-vectors (svdot_single_za16_mf8_vg1x4_fpm) +;; - FDOT (4-way, multiple and single vector) +;; - 2 ZA single-vectors (svdot_single_za32_mf8_vg1x2_fpm) +;; - 4 ZA single-vectors (svdot_single_za32_mf8_vg1x4_fpm) + +;; - FDOT (2-way, multiple and indexed vector, FP8 to FP16) +;; - 2 ZA single-vectors (svdot_lane_za16_mf8_vg1x2_fpm) +;; - 4 ZA single-vectors (svdot_lane_za16_mf8_vg1x4_fpm) +;; - FDOT (4-way, multiple and indexed vector) +;; - 2 ZA single-vectors (svdot_lane_za32_mf8_vg1x2_fpm) +;; - 4 ZA single-vectors (svdot_lane_za32_mf8_vg1x4_fpm) + +;; - FVDOT (FP8 to FP16) +;; - 2 ZA single-vectors (svvdot_lane_za16_mf8_vg1x2_fpm) + +;; - FVDOTB +;; - 2 ZA single-vectors (svvdotb_lane_za32_mf8_vg1x4_fpm) + +;; - FVDOTT +;; - 2 ZA single-vectors (svvdott_lane_za32_mf8_vg1x4_fpm) +;; ------------------------------------------------------------------------- + +;; FDOT (2-way, multiple vectors, FP8 to FP16) +;; Two ZA single-vectors (svdot_za16_mf8_vg1x2_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx2}], { <Zn1>.B-<Zn2>.B }, { <Zm1>.B-<Zm2>.B } +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 2 +;; <Zn2> must be a multiple of 2 + 1 +;; <Zm1> must be a multiple of 2 +;; <Zm2> must be a multiple of 2 + 1 +;; Four ZA single-vectors (svdot_za16_mf8_vg1x4_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx4}], { <Zn1>.B-<Zn4>.B }, { <Zm1>.B-<Zm4>.B } +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 4 +;; <Zn4> must be a multiple of 4 + 3 +;; <Zm1> must be a multiple of 4 +;; <Zm4> must be a multiple of 4 + 3 +;; FDOT (4-way, multiple vectors) +;; Two ZA single-vectors (svdot_za32_mf8_vg1x2_fpm) +;; FDOT ZA.S[<Wv>, <offs>{, VGx2}], { <Zn1>.B-<Zn2>.B }, { <Zm1>.B-<Zm2>.B } +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 2 +;; <Zn2> must be a multiple of 2 + 1 +;; <Zm1> must be a multiple of 2 +;; <Zm2> must be a multiple of 2 + 1 +;; Four ZA single-vectors (svdot_za32_mf8_vg1x4_fpm) +;; FDOT ZA.S[<Wv>, <offs>{, VGx4}], { <Zn1>.B-<Zn4>.B }, { <Zm1>.B-<Zm4>.B } +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 4 +;; <Zn2> must be a multiple of 4 + 3 +;; <Zm1> must be a multiple of 4 +;; <Zm2> must be a multiple of 4 + 3 +(define_insn "@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_FP8_x24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_FP8_x24 2 "aligned_register_operand" "Uw<vector_count>") + (reg:DI FPM_REGNUM)] + SME_FP8_DOTPROD))] + "TARGET_STREAMING" + "<optab>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, 0, vgx<vector_count>], %1, %2" +) +(define_insn "@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>_plus" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_FP8_x24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_FP8_x24 3 "aligned_register_operand" "Uw<vector_count>") + (reg:DI FPM_REGNUM)] + SME_FP8_DOTPROD))] + "TARGET_STREAMING" + "<optab>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, %1, vgx<vector_count>], %2, %3" +) + +;; FDOT (2-way, multiple and single vector, FP8 to FP16) +;; Two ZA single-vectors (svdot_single_za16_mf8_vg1x2_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx2}], { <Zn1>.B-<Zn2>.B }, <Zm>.B +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> can be any Z register +;; <Zn2> must be (<Zn1> + 1) mod 32 +;; <Zm> must be Z0-Z15 +;; Four ZA single-vectors (svdot_single_za16_mf8_vg1x4_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx4}], { <Zn1>.B-<Zn4>.B }, <Zm>.B +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> can be any Z register +;; <Zn4> must be (<Zn1> + 3) mod 32 +;; <Zm> must be Z0-Z15 +;; FDOT (4-way, multiple and single vector) +;; Two ZA single-vectors (svdot_single_za32_mf8_vg1x2_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx2}], { <Zn1>.B-<Zn2>.B }, <Zm>.B +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> can be any Z register +;; <Zn2> must be (<Zn1> + 1) mod 32 +;; <Zm> must be Z0-Z15 +;; Four ZA single-vectors (svdot_single_za32_mf8_vg1x2_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx4}], { <Zn1>.B-<Zn4>.B }, <Zm>.B +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> can be any Z register +;; <Zn4> must be (<Zn1> + 3) mod 32 +;; <Zm> must be Z0-Z15 +(define_insn "@aarch64_sme_single<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_FP8_x24 1 "register_operand" "w") + (match_operand:<SME_ZA_FP8_x24:VSINGLE> 2 "register_operand" "x") + (reg:DI FPM_REGNUM)] + SME_FP8_DOTPROD))] + "TARGET_STREAMING" + "<optab>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.b" +) +(define_insn "@aarch64_sme_single<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>_plus" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_FP8_x24 2 "register_operand" "w") + (match_operand:<SME_ZA_FP8_x24:VSINGLE> 3 "register_operand" "x") + (reg:DI FPM_REGNUM)] + SME_FP8_DOTPROD))] + "TARGET_STREAMING" + "<optab>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.b" +) + +;; FDOT (2-way, multiple and indexed vector, FP8 to FP16) +;; Two ZA single-vectors (svdot_lane_za16_mf8_vg1x2_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx2}], { <Zn1>.B-<Zn2>.B }, <Zm>.B[<index>] +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 2 +;; <Zn2> must be a multiple of 2 + 1 +;; <Zm> must be Z0-Z15 +;; <index> must be 0-7 +;; Four ZA single-vectors (svdot_lane_za16_mf8_vg1x4_fpm) +;; FDOT ZA.H[<Wv>, <offs>{, VGx4}], { <Zn1>.B-<Zn4>.B }, <Zm>.B[<index>] +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 4 +;; <Zn4> must be a multiple of 4 + 3 +;; <Zm> must be Z0-Z15 +;; <index> must be 0-7 +;; Two ZA single-vectors (svdot_lane_za32_mf8_vg1x2_fpm) +;; FDOT ZA.S[<Wv>, <offs>{, VGx2}], { <Zn1>.B-<Zn2>.B }, <Zm>.B[<index>] +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 2 +;; <Zn2> must be a multiple of 2 + 1 +;; <Zm> must be Z0-Z15 +;; <index> must be 0-3 +;; Four ZA single-vectors (svdot_lane_za32_mf8_vg1x4_fpm) +;; FDOT ZA.S[<Wv>, <offs>{, VGx4}], { <Zn1>.B-<Zn4>.B }, <Zm>.B[<index>] +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 4 +;; <Zn4> must be a multiple of 4 + 3 +;; <Zm> must be Z0-Z15 +;; <index> must be 0-3 +(define_insn "@aarch64_sme_lane<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_FP8_x24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:<SME_ZA_FP8_x24:VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand") + (reg:DI FPM_REGNUM)] + SME_FP8_DOTPROD))] + "TARGET_STREAMING" + "<optab>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.b[%3]" +) + +(define_insn "@aarch64_sme_lane<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>_plus" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_FP8_x24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:<SME_ZA_FP8_x24:VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand") + (reg:DI FPM_REGNUM)] + SME_FP8_DOTPROD))] + "TARGET_STREAMING" + "<optab>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.b[%4]" +) + +;; FVDOT (2-way, multiple and indexed vector, FP8 to FP16) +;; Two ZA single-vectors (svvdot_lane_za16_mf8_vg1x2_fpm) +;; FVDOT ZA.H[<Wv>, <offs>{, VGx2}], { <Zn1>.B-<Zn2>.B }, <Zm>.B[<index>] +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 2 +;; <Zn2> must be a multiple of 2 + 1 +;; <Zm> must be Z0-Z15 +;; <index> must be 0-7 +(define_insn "@aarch64_sme_lane<optab><SME_ZA_F8F16:mode><SME_ZA_FP8_x2:mode>" + [(set (reg:SME_ZA_F8F16 ZA_REGNUM) + (unspec:SME_ZA_F8F16 + [(reg:SME_ZA_F8F16 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_FP8_x2 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:<SME_ZA_FP8_x2:VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand") + (reg:DI FPM_REGNUM)] + SME_FP8_FVDOT))] + "TARGET_STREAMING" + "<optab>\tza.h[%w0, 0, vgx<vector_count>], %1, %2.b[%3]" +) +(define_insn "@aarch64_sme_lane<optab><SME_ZA_F8F16:mode><SME_ZA_FP8_x2:mode>_plus" + [(set (reg:SME_ZA_F8F16 ZA_REGNUM) + (unspec:SME_ZA_F8F16 + [(reg:SME_ZA_F8F16 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:SME_ZA_FP8_x2 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:<SME_ZA_FP8_x2:VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand") + (reg:DI FPM_REGNUM)] + SME_FP8_FVDOT))] + "TARGET_STREAMING" + "<optab>\tza.h[%w0, %1, vgx<vector_count>], %2, %3.b[%4]" +) + +;; FVDOTB (svvdotb_lane_za32_mf8_vg1x4_fpm) +;; FVDOTT (svvdott_lane_za32_mf8_vg1x4_fpm) +;; FVDOTB ZA.S[<Wv>, <offs>, VGx4], { <Zn1>.B-<Zn2>.B }, <Zm>.B[<index>] +;; FVDOTT ZA.S[<Wv>, <offs>, VGx4], { <Zn1>.B-<Zn2>.B }, <Zm>.B[<index>] +;; <Wv> must be W8-W11 +;; <offs> must be 0-7 +;; <Zn1> must be a multiple of 2 +;; <Zn2> must be a multiple of 2 + 1 +;; <Zm> must be Z0-Z15 +;; <index> must be 0-3 +(define_insn "@aarch64_fvdot_half<optab>" + [(set (reg:SME_ZA_F8F32 ZA_REGNUM) + (unspec:SME_ZA_F8F32 + [(reg:SME_ZA_F8F32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:VNx32QI 1 "aligned_register_operand" "Uw2") + (match_operand:VNx16QI 2 "register_operand" "x") + (match_operand:DI 3 "const_int_operand") + (reg:DI FPM_REGNUM)] + SME_FP8_FVDOT_HALF))] + "TARGET_STREAMING" + "<optab>\tza.s[%w0, 0, vgx4], %1, %2.b[%3]" +) +(define_insn "@aarch64_fvdot_half<optab>_plus" + [(set (reg:SME_ZA_F8F32 ZA_REGNUM) + (unspec:SME_ZA_F8F32 + [(reg:SME_ZA_F8F32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_0_to_7_operand")) + (match_operand:VNx32QI 2 "aligned_register_operand" "Uw2") + (match_operand:VNx16QI 3 "register_operand" "x") + (match_operand:DI 4 "const_int_operand") + (reg:DI FPM_REGNUM)] + SME_FP8_FVDOT_HALF))] + "TARGET_STREAMING" + "<optab>\tza.s[%w0, %1, vgx4], %2, %3.b[%4]" +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Ternary arithmetic on ZA slice ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h index f5cf6bfb899b..67f145b15f9d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h @@ -483,6 +483,24 @@ public: } }; +class svvdot_half_impl : public read_write_za<unspec_based_function_base> +{ +public: + using parent = read_write_za<unspec_based_function_base>; + + CONSTEXPR svvdot_half_impl (int unspec_for_sint, int unspec_for_uint, + int unspec_for_fp, int unspec_for_mfp8) + : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, unspec_for_mfp8, + 1) + {} + + rtx expand (function_expander &e) const override + { + insn_code icode = code_for_aarch64_fvdot_half (unspec_for (e)); + return e.use_exact_insn (icode); + } +}; + using sme_2mode_function = sme_2mode_function_t<code_for_aarch64_sme, code_for_aarch64_sme_single>; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index ea4be3733c25..648f94395186 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -2507,6 +2507,43 @@ struct dot_za_slice_lane_def : public binary_za_slice_lane_base<> }; SHAPE (dot_za_slice_lane) +/* void svvdott_lane_za32[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x2_t zn, + svmfloat8_t zm, uint64_t imm_idx, + fpm_t fpm) __arm_streaming + __arm_inout ("za"); + void svvdotb_lane_za32[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x2_t zn, + svmfloat8_t zm, uint64_t imm_idx, + fpm_t fpm) __arm_streaming + __arm_inout ("za"); */ +struct dot_half_za_slice_lane_def : public binary_za_slice_lane_base<> +{ + + constexpr dot_half_za_slice_lane_def () : binary_za_slice_lane_base<> (0) + {} + + void build (function_builder &b, + const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "_,su32,T1,v1,su64", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (5) + || !r.require_scalar_type (0, "uint32_t") + || !(type = r.infer_vector_or_tuple_type (1, 2)) + || !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t) + || !r.require_integer_immediate (3)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (dot_half_za_slice_lane) + /* void svfoo_lane_t0[_t1]_g(uint32_t, sv<t1>x<g>_t, sv<t1:uint>_t, uint64_t) where the final argument indexes a <t0>-sized group of elements in the diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h index 421cb1e8b854..c215cf9d2545 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h @@ -126,6 +126,7 @@ namespace aarch64_sve extern const function_shape *const count_pred_c; extern const function_shape *const count_vector; extern const function_shape *const create; + extern const function_shape *const dot_half_za_slice_lane; extern const function_shape *const dot_za_slice_int_lane; extern const function_shape *const dot_za_slice_lane; extern const function_shape *const dot_za_slice_uint_lane; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc index 20a6ebc40590..0e63c68a9cba 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc @@ -628,10 +628,12 @@ FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode)) FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode)) FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode)) FUNCTION (svdot_za, sme_2mode_function, (UNSPEC_SME_SDOT, UNSPEC_SME_UDOT, - UNSPEC_SME_FDOT)) + UNSPEC_SME_FDOT, + UNSPEC_SME_FDOT_FP8)) FUNCTION (svdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SDOT, UNSPEC_SME_UDOT, - UNSPEC_SME_FDOT)) + UNSPEC_SME_FDOT, + UNSPEC_SME_FDOT_FP8)) FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR)) FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER)) FUNCTION (svldr_za, svldr_za_impl, ) @@ -683,7 +685,12 @@ FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1)) FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1)) FUNCTION (svvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SVDOT, UNSPEC_SME_UVDOT, - UNSPEC_SME_FVDOT)) + UNSPEC_SME_FVDOT, + UNSPEC_SME_FVDOT_FP8)) +FUNCTION (svvdotb_lane_za, svvdot_half_impl, + (-1, -1, -1, UNSPEC_SME_FVDOTB_FP8)) +FUNCTION (svvdott_lane_za, svvdot_half_impl, + (-1, -1, -1, UNSPEC_SME_FVDOTT_FP8)) FUNCTION (svwrite_za, svwrite_za_impl,) FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR)) FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index 6306ee33a14e..b4d00de63b4f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -218,6 +218,32 @@ DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float, DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F16) +DEF_SME_ZA_FUNCTION_GS_FPM (svdot, binary_za_slice_opt_single, za_h_mf8, + vg1x24, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svdot_lane, dot_za_slice_lane, za_h_mf8, + vg1x24, none, set) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F32) +DEF_SME_ZA_FUNCTION_GS_FPM (svdot, binary_za_slice_opt_single, za_s_mf8, + vg1x24, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svdot_lane, dot_za_slice_lane, za_s_mf8, + vg1x24, none, set) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F16) +DEF_SME_ZA_FUNCTION_GS_FPM (svvdot_lane, dot_za_slice_lane, za_h_mf8, + vg1x2, none, set) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F32) +DEF_SME_ZA_FUNCTION_GS_FPM (svvdotb_lane, dot_half_za_slice_lane, za_s_mf8, + vg1x4, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svvdott_lane, dot_half_za_slice_lane, za_s_mf8, + vg1x4, none, set) +#undef REQUIRED_EXTENSIONS + #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16) DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_float, vg1x24, none) DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_float, @@ -283,4 +309,5 @@ DEF_SME_ZA_FUNCTION_GS_FPM (svmopa, binary_za_m, za_s_mf8, none, za_m, set) #undef DEF_SME_ZA_FUNCTION #undef DEF_SME_ZA_FUNCTION_GS +#undef DEF_SME_ZA_FUNCTION_GS_FPM #undef DEF_SME_FUNCTION diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h b/gcc/config/aarch64/aarch64-sve-builtins-sme.h index 4968f65442be..26761699127e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h @@ -38,6 +38,8 @@ namespace aarch64_sve extern const function_base *const svcntsw; extern const function_base *const svdot_za; extern const function_base *const svdot_lane_za; + extern const function_base *const svvdotb_lane_za; + extern const function_base *const svvdott_lane_za; extern const function_base *const svld1_hor_za; extern const function_base *const svld1_ver_za; extern const function_base *const svldr_za; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 505a2445d1fc..bf025adde890 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -668,6 +668,14 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_za_h_mf8(S, D) \ D (za16, mf8) +/* _za32_mf8. */ +#define TYPES_za_s_mf8(S, D) \ + D (za32, mf8) + +/* { _za_16 _za_32 } x _mf8. */ +#define TYPES_za_hs_mf8(S, D) \ + D (za16, mf8), D (za32, mf8) + /* _za16_bf16. */ #define TYPES_za_h_bfloat(S, D) \ D (za16, bf16) @@ -868,6 +876,7 @@ DEF_SVE_TYPES_ARRAY (za_s_h_data); DEF_SVE_TYPES_ARRAY (za_s_unsigned); DEF_SVE_TYPES_ARRAY (za_s_integer); DEF_SVE_TYPES_ARRAY (za_s_mf8); +DEF_SVE_TYPES_ARRAY (za_hs_mf8); DEF_SVE_TYPES_ARRAY (za_s_float); DEF_SVE_TYPES_ARRAY (za_s_data); DEF_SVE_TYPES_ARRAY (za_d_h_integer); diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 56937739518e..6ce162aef1e1 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -758,12 +758,18 @@ (define_mode_iterator SME_ZA_HFx124 [VNx8BF VNx16BF VNx32BF VNx8HF VNx16HF VNx32HF]) +(define_mode_iterator SME_ZA_F8F16 [(VNx8HI "TARGET_STREAMING_SME_F8F16")]) +(define_mode_iterator SME_ZA_F8F32 [(VNx4SI "TARGET_STREAMING_SME_F8F32")]) + (define_mode_iterator SME_ZA_F8F16_32 [(VNx8HI "TARGET_STREAMING_SME_F8F16") (VNx4SI "TARGET_STREAMING_SME_F8F32")]) (define_mode_iterator SME_ZA_FP8_x24 [VNx32QI VNx64QI]) (define_mode_iterator SME_ZA_FP8_x124 [VNx16QI VNx32QI VNx64QI]) +(define_mode_iterator SME_ZA_FP8_x1 [VNx16QI]) +(define_mode_iterator SME_ZA_FP8_x2 [VNx32QI]) +(define_mode_iterator SME_ZA_FP8_x4 [VNx64QI]) (define_mode_iterator SME_ZA_HFx24 [VNx16BF VNx32BF VNx16HF VNx32HF]) @@ -1270,7 +1276,11 @@ UNSPEC_SME_BMOPS UNSPEC_SME_FADD UNSPEC_SME_FDOT + UNSPEC_SME_FDOT_FP8 UNSPEC_SME_FVDOT + UNSPEC_SME_FVDOT_FP8 + UNSPEC_SME_FVDOTT_FP8 + UNSPEC_SME_FVDOTB_FP8 UNSPEC_SME_FMLA UNSPEC_SME_FMLAL UNSPEC_SME_FMLS @@ -4066,6 +4076,12 @@ (define_int_iterator SME_FP_TERNARY_SLICE [UNSPEC_SME_FMLA UNSPEC_SME_FMLS]) (define_int_iterator SME_FP8_TERNARY_SLICE [UNSPEC_SME_FMLAL]) +(define_int_iterator SME_FP8_DOTPROD [UNSPEC_SME_FDOT_FP8]) +(define_int_iterator SME_FP8_FVDOT [UNSPEC_SME_FVDOT_FP8]) +(define_int_iterator SME_FP8_FVDOT_HALF [ + UNSPEC_SME_FVDOTB_FP8 + UNSPEC_SME_FVDOTT_FP8 +]) ;; Iterators for atomic operations. @@ -4214,7 +4230,11 @@ (UNSPEC_SME_BMOPS "bmops") (UNSPEC_SME_FADD "fadd") (UNSPEC_SME_FDOT "fdot") + (UNSPEC_SME_FDOT_FP8 "fdot") (UNSPEC_SME_FVDOT "fvdot") + (UNSPEC_SME_FVDOT_FP8 "fvdot") + (UNSPEC_SME_FVDOTB_FP8 "fvdotb") + (UNSPEC_SME_FVDOTT_FP8 "fvdott") (UNSPEC_SME_FMLA "fmla") (UNSPEC_SME_FMLAL "fmlal") (UNSPEC_SME_FMLS "fmls") diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x2.c new file mode 100644 index 000000000000..462834e41987 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x2.c @@ -0,0 +1,119 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f16" + +/* Available variants are: + _za16 if __ARM_FEATURE_SME_F8F16 != 0 + void svdot_lane_za16[_mf8]_vg1x2_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (0, z0, z4, 0, fpm0), + svdot_lane_za16_vg1x2_fpm (0, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w0, z0, z7, 1, fpm0), + svdot_lane_za16_vg1x2_fpm (w0, z0, z7, 1, fpm0)) + +/* +** dot_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w8, z28, z4, 2, fpm0), + svdot_lane_za16_vg1x2_fpm (w8, z28, z4, 2, fpm0)) + +/* +** dot_lane_w8p7_z0_z4_3: +** msr fpmr, x1 +** fdot za\.h\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w8 + 7, z0, z4, 3, fpm0), + svdot_lane_za16_vg1x2_fpm (w8 + 7, z0, z4, 3, fpm0)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w8 + 8, z0, z4, 0, fpm0), + svdot_lane_za16_vg1x2_fpm (w8 + 8, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w0 - 1, z0, z4, 1, fpm0), + svdot_lane_za16_vg1x2_fpm (w0 - 1, z0, z4, 1, fpm0)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w8, z4, z15, 2, fpm0), + svdot_lane_za16_vg1x2_fpm (w8, z4, z15, 2, fpm0)) + +/* +** dot_lane_w8_z28_z16_3: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.h\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w8, z28, z16, 3, fpm0), + svdot_lane_za16_vg1x2_fpm (w8, z28, z16, 3, fpm0)) + +/* +** dot_lane_w8_z17_z7_0: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.h\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w8, z17, z7, 0, fpm0), + svdot_lane_za16_vg1x2_fpm (w8, z17, z7, 0, fpm0)) + +/* +** dot_lane_w8_z22_z4_1: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x2_fpm (w8, z22, z4, 1, fpm0), + svdot_lane_za16_vg1x2_fpm (w8, z22, z4, 1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x4.c new file mode 100644 index 000000000000..b084a8007fcf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za16_mf8_vg1x4.c @@ -0,0 +1,125 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f16" + +/* Available variants are: + _za16 if __ARM_FEATURE_SME_F8F16 != 0 + void svdot_lane_za16[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x4_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.h\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (0, z0, z4, 0, fpm0), + svdot_lane_za16_vg1x4_fpm (0, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.h\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w0, z0, z7, 1, fpm0), + svdot_lane_za16_vg1x4_fpm (w0, z0, z7, 1, fpm0)) + +/* +** dot_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w8, z28, z4, 2, fpm0), + svdot_lane_za16_vg1x4_fpm (w8, z28, z4, 2, fpm0)) + +/* +** dot_lane_w8p7_z0_z4_3: +** msr fpmr, x1 +** fdot za\.h\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0), + svdot_lane_za16_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0), + svdot_lane_za16_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0), + svdot_lane_za16_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w8, z4, z15, 2, fpm0), + svdot_lane_za16_vg1x4_fpm (w8, z4, z15, 2, fpm0)) + +/* +** dot_lane_w8_z28_z16_3: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.h\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w8, z28, z16, 3, fpm0), + svdot_lane_za16_vg1x4_fpm (w8, z28, z16, 3, fpm0)) + +/* +** dot_lane_w8_z17_z7_0: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.h\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w8, z17, z7, 0, fpm0), + svdot_lane_za16_vg1x4_fpm (w8, z17, z7, 0, fpm0)) + +/* +** dot_lane_w8_z22_z4_1: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.h\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za16_mf8_vg1x4_fpm (w8, z22, z4, 1, fpm0), + svdot_lane_za16_vg1x4_fpm (w8, z22, z4, 1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x2.c new file mode 100644 index 000000000000..36e53a3d1fb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x2.c @@ -0,0 +1,119 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are: + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svdot_lane_za32[_mf8]_vg1x2_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (0, z0, z4, 0, fpm0), + svdot_lane_za32_vg1x2_fpm (0, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w0, z0, z7, 1, fpm0), + svdot_lane_za32_vg1x2_fpm (w0, z0, z7, 1, fpm0)) + +/* +** dot_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w8, z28, z4, 2, fpm0), + svdot_lane_za32_vg1x2_fpm (w8, z28, z4, 2, fpm0)) + +/* +** dot_lane_w8p7_z0_z4_3: +** msr fpmr, x1 +** fdot za\.s\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w8 + 7, z0, z4, 3, fpm0), + svdot_lane_za32_vg1x2_fpm (w8 + 7, z0, z4, 3, fpm0)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w8 + 8, z0, z4, 0, fpm0), + svdot_lane_za32_vg1x2_fpm (w8 + 8, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w0 - 1, z0, z4, 1, fpm0), + svdot_lane_za32_vg1x2_fpm (w0 - 1, z0, z4, 1, fpm0)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w8, z4, z15, 2, fpm0), + svdot_lane_za32_vg1x2_fpm (w8, z4, z15, 2, fpm0)) + +/* +** dot_lane_w8_z28_z16_3: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w8, z28, z16, 3, fpm0), + svdot_lane_za32_vg1x2_fpm (w8, z28, z16, 3, fpm0)) + +/* +** dot_lane_w8_z17_z7_0: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w8, z17, z7, 0, fpm0), + svdot_lane_za32_vg1x2_fpm (w8, z17, z7, 0, fpm0)) + +/* +** dot_lane_w8_z22_z4_1: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svmfloat8x2_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x2_fpm (w8, z22, z4, 1, fpm0), + svdot_lane_za32_vg1x2_fpm (w8, z22, z4, 1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x4.c new file mode 100644 index 000000000000..97c0070b06b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_lane_za32_mf8_vg1x4.c @@ -0,0 +1,125 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are: + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svdot_lane_za32[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x4_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (0, z0, z4, 0, fpm0), + svdot_lane_za32_vg1x4_fpm (0, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w0, z0, z7, 1, fpm0), + svdot_lane_za32_vg1x4_fpm (w0, z0, z7, 1, fpm0)) + +/* +** dot_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w8, z28, z4, 2, fpm0), + svdot_lane_za32_vg1x4_fpm (w8, z28, z4, 2, fpm0)) + +/* +** dot_lane_w8p7_z0_z4_3: +** msr fpmr, x1 +** fdot za\.s\[w8, 7, vgx4\], {z0\.b - z3\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0), + svdot_lane_za32_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0), + svdot_lane_za32_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0), + svdot_lane_za32_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w8, z4, z15, 2, fpm0), + svdot_lane_za32_vg1x4_fpm (w8, z4, z15, 2, fpm0)) + +/* +** dot_lane_w8_z28_z16_3: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w8, z28, z16, 3, fpm0), + svdot_lane_za32_vg1x4_fpm (w8, z28, z16, 3, fpm0)) + +/* +** dot_lane_w8_z17_z7_0: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w8, z17, z7, 0, fpm0), + svdot_lane_za32_vg1x4_fpm (w8, z17, z7, 0, fpm0)) + +/* +** dot_lane_w8_z22_z4_1: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svmfloat8x4_t, svmfloat8_t, + svdot_lane_za32_mf8_vg1x4_fpm (w8, z22, z4, 1, fpm0), + svdot_lane_za32_vg1x4_fpm (w8, z22, z4, 1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x2.c new file mode 100644 index 000000000000..f7270551dcb0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f16" + +/* Available variants are: + _za16 if __ARM_FEATURE_SME_F8F16 != 0 + void svdot[_single]_za16[_mf8]_vg1x2_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ +/* +** dot_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.h\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (0, z1, z0, fpm0), + svdot_za16_vg1x2_fpm (0, z1, z0, fpm0)) + +/* +** dot_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.h\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w0, z1, z0, fpm0), + svdot_za16_vg1x2_fpm (w0, z1, z0, fpm0)) + +/* +** dot_single_w8_z1_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w8, z1, z0, fpm0), + svdot_za16_vg1x2_fpm (w8, z1, z0, fpm0)) + +/* +** dot_single_w8p1_z1_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w8 + 1, z1, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** dot_single_w8p2_z20_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w8 + 2, z20, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 + 2, z20, z0, fpm0)) + +/* +** dot_single_w11p4_z27_z0: +** msr fpmr, x1 +** fdot za\.h\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w11 + 4, z27, z0, fpm0), + svdot_za16_vg1x2_fpm (w11 + 4, z27, z0, fpm0)) + +/* +** dot_single_w8p7_z1_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w8 + 7, z1, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w8 + 8, z1, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w0 - 1, z1, z0, fpm0), + svdot_za16_vg1x2_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w8, z0, z15, fpm0), + svdot_za16_vg1x2_fpm (w8, z0, z15, fpm0)) + +/* +** dot_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.h\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svmfloat8x2_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x2_fpm (w8, z20, z16, fpm0), + svdot_za16_vg1x2_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x4.c new file mode 100644 index 000000000000..1976c5c686dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za16_mf8_vg1x4.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f16" + +/* Available variants are: + _za16 if __ARM_FEATURE_SME_F8F16 != 0 + void svdot[_single]_za16[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x4_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ +/* +** dot_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.h\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (0, z1, z0, fpm0), + svdot_za16_vg1x4_fpm (0, z1, z0, fpm0)) + +/* +** dot_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.h\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w0, z1, z0, fpm0), + svdot_za16_vg1x4_fpm (w0, z1, z0, fpm0)) + +/* +** dot_single_w8_z1_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w8, z1, z0, fpm0), + svdot_za16_vg1x4_fpm (w8, z1, z0, fpm0)) + +/* +** dot_single_w8p1_z1_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w8 + 1, z1, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** dot_single_w8p2_z20_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 2, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w8 + 2, z20, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 + 2, z20, z0, fpm0)) + +/* +** dot_single_w11p4_z27_z0: +** msr fpmr, x1 +** fdot za\.h\[w11, 4, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w11 + 4, z27, z0, fpm0), + svdot_za16_vg1x4_fpm (w11 + 4, z27, z0, fpm0)) + +/* +** dot_single_w8p7_z1_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w8 + 7, z1, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w8 + 8, z1, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w0 - 1, z1, z0, fpm0), + svdot_za16_vg1x4_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w8, z0, z15, fpm0), + svdot_za16_vg1x4_fpm (w8, z0, z15, fpm0)) + +/* +** dot_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.h\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svmfloat8x4_t, svmfloat8_t, + svdot_single_za16_mf8_vg1x4_fpm (w8, z20, z16, fpm0), + svdot_za16_vg1x4_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x2.c new file mode 100644 index 000000000000..d60c45805300 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x2.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are: + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svdot[_single]_za32[_mf8]_vg1x2_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ +/* +** dot_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (0, z1, z0, fpm0), + svdot_za32_vg1x2_fpm (0, z1, z0, fpm0)) + +/* +** dot_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w0, z1, z0, fpm0), + svdot_za32_vg1x2_fpm (w0, z1, z0, fpm0)) + +/* +** dot_single_w8_z1_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w8, z1, z0, fpm0), + svdot_za32_vg1x2_fpm (w8, z1, z0, fpm0)) + +/* +** dot_single_w8p1_z1_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w8 + 1, z1, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** dot_single_w8p2_z20_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 2, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w8 + 2, z20, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 + 2, z20, z0, fpm0)) + +/* +** dot_single_w11p4_z27_z0: +** msr fpmr, x1 +** fdot za\.s\[w11, 4, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w11 + 4, z27, z0, fpm0), + svdot_za32_vg1x2_fpm (w11 + 4, z27, z0, fpm0)) + +/* +** dot_single_w8p7_z1_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 7, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w8 + 7, z1, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w8 + 8, z1, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w0 - 1, z1, z0, fpm0), + svdot_za32_vg1x2_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w8, z0, z15, fpm0), + svdot_za32_vg1x2_fpm (w8, z0, z15, fpm0)) + +/* +** dot_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svmfloat8x2_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x2_fpm (w8, z20, z16, fpm0), + svdot_za32_vg1x2_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x4.c new file mode 100644 index 000000000000..ab1c1087f82c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_single_za32_mf8_vg1x4.c @@ -0,0 +1,126 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are: + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svdot[_single]_za32[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x4_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ +/* +** dot_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (0, z1, z0, fpm0), + svdot_za32_vg1x4_fpm (0, z1, z0, fpm0)) + +/* +** dot_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w0, z1, z0, fpm0), + svdot_za32_vg1x4_fpm (w0, z1, z0, fpm0)) + +/* +** dot_single_w8_z1_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w8, z1, z0, fpm0), + svdot_za32_vg1x4_fpm (w8, z1, z0, fpm0)) + +/* +** dot_single_w8p1_z1_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w8 + 1, z1, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** dot_single_w8p2_z20_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 2, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p2_z20_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w8 + 2, z20, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 + 2, z20, z0, fpm0)) + +/* +** dot_single_w11p4_z27_z0: +** msr fpmr, x1 +** fdot za\.s\[w11, 4, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w11p4_z27_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w11 + 4, z27, z0, fpm0), + svdot_za32_vg1x4_fpm (w11 + 4, z27, z0, fpm0)) + +/* +** dot_single_w8p7_z1_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 7, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p7_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w8 + 7, z1, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** dot_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8p8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w8 + 8, z1, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** dot_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w0m1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w0 - 1, z1, z0, fpm0), + svdot_za32_vg1x4_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** dot_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (dot_single_w8_z0_z15, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w8, z0, z15, fpm0), + svdot_za32_vg1x4_fpm (w8, z0, z15, fpm0)) + +/* +** dot_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fdot za\.s\[w8, 0, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (dot_single_w8_z20_z16, svmfloat8x4_t, svmfloat8_t, + svdot_single_za32_mf8_vg1x4_fpm (w8, z20, z16, fpm0), + svdot_za32_vg1x4_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x2.c new file mode 100644 index 000000000000..117d352061f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x2.c @@ -0,0 +1,150 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f16" + +/* Available variants are + _za16 if __ARM_FEATURE_SME_F8F16 != 0 + void svdot_za16[_mf8]_vg1x2_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (0, z0, z0, fpm0), + svdot_za16_vg1x2_fpm (0, z0, z0, fpm0)) + +/* +** dot_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (w0, z0, z0, fpm0), + svdot_za16_vg1x2_fpm (w0, z0, z0, fpm0)) + +/* +** dot_w8_z0_z4: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (w8, z0, z4, fpm0), + svdot_za16_vg1x2_fpm (w8, z0, z4, fpm0)) + +/* +** dot_w8_z4_z18: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (w8, z4, z18, fpm0), + svdot_za16_vg1x2_fpm (w8, z4, z18, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** msr fpmr, x1 +** ... +** fdot za\.h\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (w8, z0, z23, fpm0), + svdot_za16_vg1x2_fpm (w8, z0, z23, fpm0)) + +/* +** dot_w8_z23_z0: +** msr fpmr, x1 +** ... +** fdot za\.h\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (w8, z23, z0, fpm0), + svdot_za16_vg1x2_fpm (w8, z23, z0, fpm0)) + +/* +** dot_w8_z18_z28: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (w8, z18, z28, fpm0), + svdot_za16_vg1x2_fpm (w8, z18, z28, fpm0)) + +/* +** dot_w8_z28_z4: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svmfloat8x2_t, svdot_za16_mf8_vg1x2_fpm (w8, z28, z4, fpm0), + svdot_za16_vg1x2_fpm (w8, z28, z4, fpm0)) + +/* +** dot_w8p1_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svmfloat8x2_t, + svdot_za16_mf8_vg1x2_fpm (w8 + 1, z4, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** dot_w8p2_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svmfloat8x2_t, + svdot_za16_mf8_vg1x2_fpm (w8 + 2, z4, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** dot_w11p4_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svmfloat8x2_t, + svdot_za16_mf8_vg1x2_fpm (w11 + 4, z4, z0, fpm0), + svdot_za16_vg1x2_fpm (w11 + 4, z4, z0, fpm0)) + +/* +** dot_w8p7_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svmfloat8x2_t, + svdot_za16_mf8_vg1x2_fpm (w8 + 7, z4, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svmfloat8x2_t, + svdot_za16_mf8_vg1x2_fpm (w8 + 8, z4, z4, fpm0), + svdot_za16_vg1x2_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svmfloat8x2_t, + svdot_za16_mf8_vg1x2_fpm (w8 - 1, z4, z0, fpm0), + svdot_za16_vg1x2_fpm (w8 - 1, z4, z0, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x4.c new file mode 100644 index 000000000000..d28bc48db0f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za16_mf8_vg1x4.c @@ -0,0 +1,166 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f16" + +/* Available variants are + _za16 if __ARM_FEATURE_SME_F8F16 != 0 + void svdot_za16[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x4_t zn, svmfloat8x4_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.h\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (0, z0, z0, fpm0), + svdot_za16_vg1x4_fpm (0, z0, z0, fpm0)) + +/* +** dot_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.h\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w0, z0, z0, fpm0), + svdot_za16_vg1x4_fpm (w0, z0, z0, fpm0)) + +/* +** dot_w8_z0_z4: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8, z0, z4, fpm0), + svdot_za16_vg1x4_fpm (w8, z0, z4, fpm0)) + +/* +** dot_w8_z4_z18: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.h\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8, z4, z18, fpm0), + svdot_za16_vg1x4_fpm (w8, z4, z18, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** msr fpmr, x1 +** ... +** fdot za\.h\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8, z0, z23, fpm0), + svdot_za16_vg1x4_fpm (w8, z0, z23, fpm0)) + +/* +** dot_w8_z23_z0: +** msr fpmr, x1 +** ... +** fdot za\.h\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8, z23, z0, fpm0), + svdot_za16_vg1x4_fpm (w8, z23, z0, fpm0)) + +/* +** dot_w8_z18_z28: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.h\[w8, 0, vgx4\], {z24\.b - z27\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8, z18, z28, fpm0), + svdot_za16_vg1x4_fpm (w8, z18, z28, fpm0)) + +/* +** dot_w8_z28_z4: +** msr fpmr, x1 +** fdot za\.h\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8, z28, z4, fpm0), + svdot_za16_vg1x4_fpm (w8, z28, z4, fpm0)) + +/* +** dot_w8p1_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8 + 1, z4, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** dot_w8p2_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8 + 2, z4, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** dot_w11p4_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w11 + 4, z4, z0, fpm0), + svdot_za16_vg1x4_fpm (w11 + 4, z4, z0, fpm0)) + +/* +** dot_w8p7_z4_z0: +** msr fpmr, x1 +** fdot za\.h\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8 + 7, z4, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8 + 8, z4, z4, fpm0), + svdot_za16_vg1x4_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fdot za\.h\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svmfloat8x4_t, + svdot_za16_mf8_vg1x4_fpm (w8 - 1, z4, z0, fpm0), + svdot_za16_vg1x4_fpm (w8 - 1, z4, z0, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x2.c new file mode 100644 index 000000000000..225e18ac05df --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x2.c @@ -0,0 +1,150 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svdot_za32[_mf8]_vg1x2_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (0, z0, z0, fpm0), + svdot_za32_vg1x2_fpm (0, z0, z0, fpm0)) + +/* +** dot_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (w0, z0, z0, fpm0), + svdot_za32_vg1x2_fpm (w0, z0, z0, fpm0)) + +/* +** dot_w8_z0_z4: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (w8, z0, z4, fpm0), + svdot_za32_vg1x2_fpm (w8, z0, z4, fpm0)) + +/* +** dot_w8_z4_z18: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (w8, z4, z18, fpm0), + svdot_za32_vg1x2_fpm (w8, z4, z18, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** msr fpmr, x1 +** ... +** fdot za\.s\[w8, 0, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (w8, z0, z23, fpm0), + svdot_za32_vg1x2_fpm (w8, z0, z23, fpm0)) + +/* +** dot_w8_z23_z0: +** msr fpmr, x1 +** ... +** fdot za\.s\[w8, 0, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (w8, z23, z0, fpm0), + svdot_za32_vg1x2_fpm (w8, z23, z0, fpm0)) + +/* +** dot_w8_z18_z28: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (w8, z18, z28, fpm0), + svdot_za32_vg1x2_fpm (w8, z18, z28, fpm0)) + +/* +** dot_w8_z28_z4: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svmfloat8x2_t, svdot_za32_mf8_vg1x2_fpm (w8, z28, z4, fpm0), + svdot_za32_vg1x2_fpm (w8, z28, z4, fpm0)) + +/* +** dot_w8p1_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svmfloat8x2_t, + svdot_za32_mf8_vg1x2_fpm (w8 + 1, z4, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** dot_w8p2_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 2, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svmfloat8x2_t, + svdot_za32_mf8_vg1x2_fpm (w8 + 2, z4, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** dot_w11p4_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w11, 4, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svmfloat8x2_t, + svdot_za32_mf8_vg1x2_fpm (w11 + 4, z4, z0, fpm0), + svdot_za32_vg1x2_fpm (w11 + 4, z4, z0, fpm0)) + +/* +** dot_w8p7_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svmfloat8x2_t, + svdot_za32_mf8_vg1x2_fpm (w8 + 7, z4, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svmfloat8x2_t, + svdot_za32_mf8_vg1x2_fpm (w8 + 8, z4, z4, fpm0), + svdot_za32_vg1x2_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svmfloat8x2_t, + svdot_za32_mf8_vg1x2_fpm (w8 - 1, z4, z0, fpm0), + svdot_za32_vg1x2_fpm (w8 - 1, z4, z0, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x4.c new file mode 100644 index 000000000000..667ba3238dd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/dot_za32_mf8_vg1x4.c @@ -0,0 +1,166 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svdot_za32[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x4_t zn, svmfloat8x4_t zm, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_0_z0_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (0, z0, z0, fpm0), + svdot_za32_vg1x4_fpm (0, z0, z0, fpm0)) + +/* +** dot_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fdot za\.s\[\1, 0, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w0_z0_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w0, z0, z0, fpm0), + svdot_za32_vg1x4_fpm (w0, z0, z0, fpm0)) + +/* +** dot_w8_z0_z4: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z4, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8, z0, z4, fpm0), + svdot_za32_vg1x4_fpm (w8, z0, z4, fpm0)) + +/* +** dot_w8_z4_z18: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z4_z18, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8, z4, z18, fpm0), + svdot_za32_vg1x4_fpm (w8, z4, z18, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** dot_w8_z0_z23: +** msr fpmr, x1 +** ... +** fdot za\.s\[w8, 0, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (dot_w8_z0_z23, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8, z0, z23, fpm0), + svdot_za32_vg1x4_fpm (w8, z0, z23, fpm0)) + +/* +** dot_w8_z23_z0: +** msr fpmr, x1 +** ... +** fdot za\.s\[w8, 0, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z23_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8, z23, z0, fpm0), + svdot_za32_vg1x4_fpm (w8, z23, z0, fpm0)) + +/* +** dot_w8_z18_z28: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fdot za\.s\[w8, 0, vgx4\], {z24\.b - z27\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z18_z28, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8, z18, z28, fpm0), + svdot_za32_vg1x4_fpm (w8, z18, z28, fpm0)) + +/* +** dot_w8_z28_z4: +** msr fpmr, x1 +** fdot za\.s\[w8, 0, vgx4\], {z28\.b - z31\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8_z28_z4, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8, z28, z4, fpm0), + svdot_za32_vg1x4_fpm (w8, z28, z4, fpm0)) + +/* +** dot_w8p1_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p1_z4_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8 + 1, z4, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** dot_w8p2_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 2, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p2_z4_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8 + 2, z4, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** dot_w11p4_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w11, 4, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w11p4_z4_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w11 + 4, z4, z0, fpm0), + svdot_za32_vg1x4_fpm (w11 + 4, z4, z0, fpm0)) + +/* +** dot_w8p7_z4_z0: +** msr fpmr, x1 +** fdot za\.s\[w8, 7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p7_z4_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8 + 7, z4, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** dot_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (dot_w8p8_z4_z4, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8 + 8, z4, z4, fpm0), + svdot_za32_vg1x4_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** dot_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fdot za\.s\[\1, 0, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (dot_w8m1_z4_z0, svmfloat8x4_t, + svdot_za32_mf8_vg1x4_fpm (w8 - 1, z4, z0, fpm0), + svdot_za32_vg1x4_fpm (w8 - 1, z4, z0, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za16_mf8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za16_mf8_vg1x2.c new file mode 100644 index 000000000000..a25c64be59d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdot_lane_za16_mf8_vg1x2.c @@ -0,0 +1,119 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f16" + +/* Available variants are: + _za16 if __ARM_FEATURE_SME_F8F16 != 0 + void svvdot_lane_za16[_mf8]_vg1x2_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dot_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fvdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_0_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (0, z0, z4, 0, fpm0), + svvdot_lane_za16_vg1x2_fpm (0, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fvdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0_z0_z7_1, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w0, z0, z7, 1, fpm0), + svvdot_lane_za16_vg1x2_fpm (w0, z0, z7, 1, fpm0)) + +/* +** dot_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fvdot za\.h\[w8, 0, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z4_2, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w8, z28, z4, 2, fpm0), + svvdot_lane_za16_vg1x2_fpm (w8, z28, z4, 2, fpm0)) + +/* +** dot_lane_w8p7_z0_z4_3: +** msr fpmr, x1 +** fvdot za\.h\[w8, 7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p7_z0_z4_3, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w8 + 7, z0, z4, 3, fpm0), + svvdot_lane_za16_vg1x2_fpm (w8 + 7, z0, z4, 3, fpm0)) + +/* +** dot_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fvdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8p8_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w8 + 8, z0, z4, 0, fpm0), + svvdot_lane_za16_vg1x2_fpm (w8 + 8, z0, z4, 0, fpm0)) + +/* +** dot_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fvdot za\.h\[\1, 0, vgx2\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w0m1_z0_z4_1, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w0 - 1, z0, z4, 1, fpm0), + svvdot_lane_za16_vg1x2_fpm (w0 - 1, z0, z4, 1, fpm0)) + +/* +** dot_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fvdot za\.h\[w8, 0, vgx2\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dot_lane_w8_z4_z15_2, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w8, z4, z15, 2, fpm0), + svvdot_lane_za16_vg1x2_fpm (w8, z4, z15, 2, fpm0)) + +/* +** dot_lane_w8_z28_z16_3: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fvdot za\.h\[w8, 0, vgx2\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z28_z16_3, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w8, z28, z16, 3, fpm0), + svvdot_lane_za16_vg1x2_fpm (w8, z28, z16, 3, fpm0)) + +/* +** dot_lane_w8_z17_z7_0: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** fvdot za\.h\[w8, 0, vgx2\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z17_z7_0, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w8, z17, z7, 0, fpm0), + svvdot_lane_za16_vg1x2_fpm (w8, z17, z7, 0, fpm0)) + +/* +** dot_lane_w8_z22_z4_1: +** msr fpmr, x1 +** fvdot za\.h\[w8, 0, vgx2\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dot_lane_w8_z22_z4_1, svmfloat8x2_t, svmfloat8_t, + svvdot_lane_za16_mf8_vg1x2_fpm (w8, z22, z4, 1, fpm0), + svvdot_lane_za16_vg1x2_fpm (w8, z22, z4, 1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdotb_lane_za32_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdotb_lane_za32_mf8_vg1x4.c new file mode 100644 index 000000000000..8b2987f91a18 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdotb_lane_za32_mf8_vg1x4.c @@ -0,0 +1,119 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are: + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svvdotb_lane_za32[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dotb_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fvdotb za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_0_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (0, z0, z4, 0, fpm0), + svvdotb_lane_za32_vg1x4_fpm (0, z0, z4, 0, fpm0)) + +/* +** dotb_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fvdotb za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w0_z0_z7_1, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w0, z0, z7, 1, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w0, z0, z7, 1, fpm0)) + +/* +** dotb_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fvdotb za\.s\[w8, 0, vgx4\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w8_z28_z4_2, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w8, z28, z4, 2, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w8, z28, z4, 2, fpm0)) + +/* +** dotb_lane_w8p7_z0_z4_3: +** msr fpmr, x1 +** fvdotb za\.s\[w8, 7, vgx4\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w8p7_z0_z4_3, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0)) + +/* +** dotb_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fvdotb za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w8p8_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0)) + +/* +** dotb_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fvdotb za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w0m1_z0_z4_1, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0)) + +/* +** dotb_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fvdotb za\.s\[w8, 0, vgx4\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dotb_lane_w8_z4_z15_2, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w8, z4, z15, 2, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w8, z4, z15, 2, fpm0)) + +/* +** dotb_lane_w8_z28_z16_3: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fvdotb za\.s\[w8, 0, vgx4\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w8_z28_z16_3, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w8, z28, z16, 3, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w8, z28, z16, 3, fpm0)) + +/* +** dotb_lane_w8_z17_z7_0: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** fvdotb za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w8_z17_z7_0, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w8, z17, z7, 0, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w8, z17, z7, 0, fpm0)) + +/* +** dotb_lane_w8_z22_z4_1: +** msr fpmr, x1 +** fvdotb za\.s\[w8, 0, vgx4\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dotb_lane_w8_z22_z4_1, svmfloat8x2_t, svmfloat8_t, + svvdotb_lane_za32_mf8_vg1x4_fpm (w8, z22, z4, 1, fpm0), + svvdotb_lane_za32_vg1x4_fpm (w8, z22, z4, 1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdott_lane_za32_mf8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdott_lane_za32_mf8_vg1x4.c new file mode 100644 index 000000000000..4d9a8e4078f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/vdott_lane_za32_mf8_vg1x4.c @@ -0,0 +1,119 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sve2,+sme-f8f32" + +/* Available variants are: + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svvdott_lane_za32[_mf8]_vg1x4_fpm (uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +/* +** dott_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fvdott za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dott_lane_0_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (0, z0, z4, 0, fpm0), + svvdott_lane_za32_vg1x4_fpm (0, z0, z4, 0, fpm0)) + +/* +** dott_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fvdott za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w0_z0_z7_1, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w0, z0, z7, 1, fpm0), + svvdott_lane_za32_vg1x4_fpm (w0, z0, z7, 1, fpm0)) + +/* +** dott_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fvdott za\.s\[w8, 0, vgx4\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w8_z28_z4_2, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w8, z28, z4, 2, fpm0), + svvdott_lane_za32_vg1x4_fpm (w8, z28, z4, 2, fpm0)) + +/* +** dott_lane_w8p7_z0_z4_3: +** msr fpmr, x1 +** fvdott za\.s\[w8, 7, vgx4\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w8p7_z0_z4_3, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0), + svvdott_lane_za32_vg1x4_fpm (w8 + 7, z0, z4, 3, fpm0)) + +/* +** dott_lane_w8p8_z0_z4_0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fvdott za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w8p8_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0), + svvdott_lane_za32_vg1x4_fpm (w8 + 8, z0, z4, 0, fpm0)) + +/* +** dott_lane_w0m1_z0_z4_1: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fvdott za\.s\[\1, 0, vgx4\], {z0\.b - z1\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w0m1_z0_z4_1, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0), + svvdott_lane_za32_vg1x4_fpm (w0 - 1, z0, z4, 1, fpm0)) + +/* +** dott_lane_w8_z4_z15_2: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fvdott za\.s\[w8, 0, vgx4\], {z4\.b - z5\.b}, z15\.b\[2\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (dott_lane_w8_z4_z15_2, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w8, z4, z15, 2, fpm0), + svvdott_lane_za32_vg1x4_fpm (w8, z4, z15, 2, fpm0)) + +/* +** dott_lane_w8_z28_z16_3: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fvdott za\.s\[w8, 0, vgx4\], {z28\.b - z29\.b}, \1\.b\[3\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w8_z28_z16_3, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w8, z28, z16, 3, fpm0), + svvdott_lane_za32_vg1x4_fpm (w8, z28, z16, 3, fpm0)) + +/* +** dott_lane_w8_z17_z7_0: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** fvdott za\.s\[w8, 0, vgx4\], [^\n]+, z7\.b\[0\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w8_z17_z7_0, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w8, z17, z7, 0, fpm0), + svvdott_lane_za32_vg1x4_fpm (w8, z17, z7, 0, fpm0)) + +/* +** dott_lane_w8_z22_z4_1: +** msr fpmr, x1 +** fvdott za\.s\[w8, 0, vgx4\], {z22\.b - z23\.b}, z4\.b\[1\] +** ret +*/ +TEST_ZA_LANE (dott_lane_w8_z22_z4_1, svmfloat8x2_t, svmfloat8_t, + svvdott_lane_za32_mf8_vg1x4_fpm (w8, z22, z4, 1, fpm0), + svvdott_lane_za32_vg1x4_fpm (w8, z22, z4, 1, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_half_za_slice_lane_fpm.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_half_za_slice_lane_fpm.c new file mode 100644 index 000000000000..69ba85b63982 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dot_half_za_slice_lane_fpm.c @@ -0,0 +1,106 @@ +// { dg-options "-std=c23 -fsyntax-only" } +// { dg-do compile } + +#pragma GCC target "+sve2,+sme-f8f32" +static_assert (__ARM_FEATURE_SME_F8F32 == 1); +#include <arm_sme.h> + +/* Available variants are: + _za32 if __ARM_FEATURE_SME_F8F32 != 0 + void svvdotb_lane_za32[_mf8]_vg1x4_fpm(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); + void svvdott_lane_za32[_mf8]_vg1x4_fpm(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); */ + +void +svvdotb_lane_ok (uint32_t slice, svmfloat8x2_t zn_f8x2, svmfloat8_t zm_f8, + fpm_t fpm) __arm_streaming __arm_inout ("za") +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); +} + +void +svvdotb_lane_error_not_streaming (uint32_t slice, svmfloat8x2_t zn_f8x2, + svmfloat8_t zm_f8, + fpm_t fpm) __arm_inout ("za") +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); // { dg-error {ACLE function 'svvdotb_lane_za32_mf8_vg1x4_fpm' can only be called when SME streaming mode is enabled} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); // { dg-error {ACLE function 'svvdotb_lane_za32_mf8_vg1x4_fpm' can only be called when SME streaming mode is enabled} } +} + +void +svvdotb_lane_error_streaming_compatible (uint32_t slice, svmfloat8x2_t zn_f8x2, + svmfloat8_t zm_f8, + fpm_t fpm) __arm_streaming_compatible +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); // { dg-error {ACLE function 'svvdotb_lane_za32_mf8_vg1x4_fpm' can only be called when SME streaming mode is enabled} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); // { dg-error {ACLE function 'svvdotb_lane_za32_mf8_vg1x4_fpm' can only be called when SME streaming mode is enabled} } +} + +void +svvdotb_lane_error_not_inout (uint32_t slice, svmfloat8x2_t zn_f8x2, + svmfloat8_t zm_f8, fpm_t fpm) __arm_streaming +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); // { dg-error {ACLE function 'svvdotb_lane_za32_mf8_vg1x4_fpm' can only be called from a function that has 'za' state} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); // { dg-error {ACLE function 'svvdotb_lane_za32_mf8_vg1x4_fpm' can only be called from a function that has 'za' state} } +} + +void +svvdotb_lane_error_arg_count_mismatch ( + uint32_t slice, svmfloat8x2_t zn_f8x2, svmfloat8_t zm_f8, + fpm_t fpm) __arm_streaming __arm_inout ("za") +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (); // { dg-error {too few arguments to function 'svvdotb_lane_za32_mf8_vg1x4_fpm'; expected 5, have 0} } + svvdotb_lane_za32_vg1x4_fpm (); // { dg-error {too few arguments to function 'svvdotb_lane_za32_vg1x4_fpm'} } + + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm, 0); // { dg-error {too many arguments to function 'svvdotb_lane_za32_mf8_vg1x4_fpm'; expected 5, have 6} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm, 0); // { dg-error {too many arguments to function 'svvdotb_lane_za32_vg1x4_fpm'} } +} + +void +svvdotb_lane_error_arg_type_mismatch ( + uint32_t slice, svmfloat8x2_t zn_f8x2, svmfloat8x4_t zn_f8x4, + svmfloat8_t zm_f8, fpm_t fpm) __arm_streaming __arm_inout ("za") +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (zm_f8, zn_f8x2, zm_f8, 0, fpm); // { dg-error {incompatible type for argument 1 of 'svvdotb_lane_za32_mf8_vg1x4_fpm'} } + svvdotb_lane_za32_vg1x4_fpm (zm_f8, zn_f8x2, zm_f8, 0, fpm); // { dg-error {passing 'svmfloat8_t' to argument 1 of 'svvdotb_lane_za32_vg1x4_fpm', which expects 'uint32_t'} } + + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x4, zm_f8, 0, fpm); // { dg-error {incompatible type for argument 2 of 'svvdotb_lane_za32_mf8_vg1x4_fpm'} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x4, zm_f8, 0, fpm); // { dg-error {passing 'svmfloat8x4_t' to argument 2 of 'svvdotb_lane_za32_vg1x4_fpm', which expects a tuple of 2 vectors} } + + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zn_f8x2, 0, fpm); // { dg-error {incompatible type for argument 3 of 'svvdotb_lane_za32_mf8_vg1x4_fpm'} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zn_f8x2, 0, fpm); // { dg-error {passing 'svmfloat8x2_t' to argument 3 of 'svvdotb_lane_za32_vg1x4_fpm', which expects 'svmfloat8_t'} } + + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, zm_f8, fpm); // { dg-error {incompatible type for argument 4 of 'svvdotb_lane_za32_mf8_vg1x4_fpm'} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, zm_f8, fpm); // { dg-error {argument 4 of 'svvdotb_lane_za32_vg1x4_fpm' must be an integer constant expression} } + + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, zm_f8); // { dg-error {incompatible type for argument 5 of 'svvdotb_lane_za32_mf8_vg1x4_fpm'} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, zm_f8); // { dg-error {incompatible type for argument 5 of 'svvdotb_lane_za32_mf8_vg1x4_fpm'} } +} + +void +svvdotb_lane_error_imm_idx_not_immediate ( + uint32_t slice, svmfloat8x2_t zn_f8x2, svmfloat8_t zm_f8, uint64_t imm_idx, + fpm_t fpm) __arm_streaming __arm_in ("zt0") +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, imm_idx, fpm); // { dg-error {argument 4 of 'svvdotb_lane_za32_mf8_vg1x4_fpm' must be an integer constant expression} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, imm_idx, fpm); // { dg-error {argument 4 of 'svvdotb_lane_za32_vg1x4_fpm' must be an integer constant expression} } +} + +void +svvdotb_lane_error_imm_idx_not_in_range ( + uint32_t slice, svmfloat8x2_t zn_f8x2, svmfloat8_t zm_f8, + fpm_t fpm) __arm_streaming __arm_in ("zt0") +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, -1, fpm); // { dg-error {passing -1 to argument 4 of 'svvdotb_lane_za32_mf8_vg1x4_fpm', which expects a value in the range \[0, 3\]} } + svvdotb_lane_za32_vg1x4_fpm (slice, zn_f8x2, zm_f8, -1, fpm); // { dg-error {passing -1 to argument 4 of 'svvdotb_lane_za32_vg1x4_fpm', which expects a value in the range \[0, 3\]} } +} + +#pragma GCC reset_options +#pragma GCC target("+sve2,+sme2") +void +svvdotb_lane_feature_not_enabled (uint32_t slice, svmfloat8x2_t zn_f8x2, + svmfloat8_t zm_f8, + fpm_t fpm) __arm_streaming __arm_inout ("za") +{ + svvdotb_lane_za32_mf8_vg1x4_fpm (slice, zn_f8x2, zm_f8, 0, fpm); // { dg-error {ACLE function 'svvdotb_lane_za32_mf8_vg1x4_fpm' requires ISA extension 'sme-f8f32'} } +}
