https://gcc.gnu.org/g:013c867f1356e2a7848eccc92e62880155698502
commit r17-980-g013c867f1356e2a7848eccc92e62880155698502 Author: Artemiy Volkov <[email protected]> Date: Fri Jan 16 13:09:52 2026 +0000 aarch64: implement FMUL SME instruction The SME2.2 extension introduces the following variants of a new streaming-mode instruction: - FMUL (Multi-vector floating-point multiply by vector) - FMUL (Multi-vector floating-point multiply) The first operand is a multi-vector consisting of two or four vectors, and the second operand either has the same type, or is a single vector of the underlying type. New intrinsics are documented in the ACLE manual [0] and are as follows: svfloat{16,32,64}x{2,4}_t svmul[_single_f{16,32,64}_x{2,4}] (svfloat{16,32,64}x{2,4}_t zd, svfloat{16,32,64}_t zm) __arm_streaming; svfloat{16,32,64}x{2,4}_t svmul[_f{16,32,64}_x{2,4}] (svfloat{16,32,64}x{2,4}_t zd, svfloat{16,32,64}x{2,4}_t zm) __arm_streaming; This patch implements the above changes throughout the SVE builtin description files and aarch64-sve2.md. [0] https://github.com/ARM-software/acle gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-sve2.def (svmul): Define new SVE function variant. * config/aarch64/aarch64-sve2.md (@aarch64_sve_<optab><mode>): New instruction pattern. (@aarch64_sve_<optab><mode>_single): Likewise. * config/aarch64/aarch64.h (TARGET_STREAMING_SME2p2): New macro. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sme2/acle-asm/mul_f16_x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/mul_f16_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mul_f32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mul_f32_x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mul_f64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mul_f64_x4.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 4 + gcc/config/aarch64/aarch64-sve2.md | 23 +++ gcc/config/aarch64/aarch64.h | 2 + .../gcc.target/aarch64/sme2/acle-asm/mul_f16_x2.c | 193 ++++++++++++++++++ .../gcc.target/aarch64/sme2/acle-asm/mul_f16_x4.c | 227 +++++++++++++++++++++ .../gcc.target/aarch64/sme2/acle-asm/mul_f32_x2.c | 193 ++++++++++++++++++ .../gcc.target/aarch64/sme2/acle-asm/mul_f32_x4.c | 227 +++++++++++++++++++++ .../gcc.target/aarch64/sme2/acle-asm/mul_f64_x2.c | 193 ++++++++++++++++++ .../gcc.target/aarch64/sme2/acle-asm/mul_f64_x4.c | 227 +++++++++++++++++++++ 9 files changed, 1289 insertions(+) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 7fbf9c30708c..a2ad77106748 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -449,6 +449,10 @@ DEF_SVE_FUNCTION_GS_FPM (svcvtl2, unary_convert, cvt_mf8, x2, none, set) DEF_SVE_FUNCTION_GS (svscale, binary_int_opt_single_n, all_float, x24, none) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2p2) +DEF_SVE_FUNCTION_GS (svmul, binary_opt_single_n, all_float, x24, none) +#undef REQUIRED_EXTENSIONS + #define REQUIRED_EXTENSIONS \ nonstreaming_sve (AARCH64_FL_SVE2 | AARCH64_FL_F8F16MM) DEF_SVE_FUNCTION_GS_FPM (svmmla, mmla, h_float_mf8, none, none, set) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index d9ad7689ff2f..4f842a61d8b3 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1593,6 +1593,7 @@ ;; ------------------------------------------------------------------------- ;; Includes the multiple and single vector and multiple vectors forms of ;; - BFMUL (SVE_BFSCALE) +;; - FMUL (SME2p2) ;; ------------------------------------------------------------------------- ;; BFMUL (multiple vectors) @@ -1625,6 +1626,28 @@ "bfmul\t%0, %1, %2.h" ) +;; FMUL (multiple vectors) +(define_insn "@aarch64_sve_<optab><mode>" + [(set (match_operand:SVE_Fx24_NOBF 0 "register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24_NOBF + [(match_operand:SVE_Fx24_NOBF 1 "register_operand" "Uw<vector_count>") + (match_operand:SVE_Fx24_NOBF 2 "register_operand" "Uw<vector_count>")] + SVE_FP_MUL))] + "TARGET_STREAMING_SME2p2" + "fmul\t%0, %1, %2" +) + +;; FMUL (multiple x single vector) +(define_insn "@aarch64_sve_<optab><mode>_single" + [(set (match_operand:SVE_Fx24_NOBF 0 "register_operand" "=Uw<vector_count>") + (unspec:SVE_Fx24_NOBF + [(match_operand:SVE_Fx24_NOBF 1 "register_operand" "Uw<vector_count>") + (match_operand:<VSINGLE> 2 "register_operand" "x")] + SVE_FP_MUL))] + "TARGET_STREAMING_SME2p2" + "fmul\t%0, %1, %2.<Vetype>" +) + ;; ========================================================================= ;; == Uniform ternary arithmnetic ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 0ef1ec49e6fa..5bd3379cb0db 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -354,6 +354,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED #define TARGET_STREAMING_SME2p1 (TARGET_STREAMING && AARCH64_HAVE_ISA (SME2p1)) +#define TARGET_STREAMING_SME2p2 (TARGET_STREAMING && AARCH64_HAVE_ISA (SME2p2)) + #define TARGET_SME_B16B16 AARCH64_HAVE_ISA (SME_B16B16) /* ARMv8.3-A features. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f16_x2.c new file mode 100644 index 000000000000..7e8063d0b2d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f16_x2.c @@ -0,0 +1,193 @@ +/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme2p2" + +/* +** mul_z0_z0_z4: +** fmul {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (mul_z0_z0_z4, svfloat16x2_t, z0, + svmul_f16_x2 (z0, z4), + svmul (z0, z4)) + +/* +** mul_z0_z4_z0: +** fmul {z0\.h - z1\.h}, {z4\.h - z5\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (mul_z0_z4_z0, svfloat16x2_t, z0, + svmul_f16_x2 (z4, z0), + svmul (z4, z0)) + +/* +** mul_z0_z4_z28: +** fmul {z0\.h - z1\.h}, {z4\.h - z5\.h}, {z28\.h - z29\.h} +** ret +*/ +TEST_XN (mul_z0_z4_z28, svfloat16x2_t, z0, + svmul_f16_x2 (z4, z28), + svmul (z4, z28)) + +/* +** mul_z18_z18_z4: +** fmul {z18\.h - z19\.h}, {z18\.h - z19\.h}, {z4\.h - z5\.h} +** ret +*/ +TEST_XN (mul_z18_z18_z4, svfloat16x2_t, z18, + svmul_f16_x2 (z18, z4), + svmul (z18, z4)) + +/* +** mul_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z18\.h - z19\.h} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z23_z23_z18, svfloat16x2_t, z23, + svmul_f16_x2 (z23, z18), + svmul (z23, z18)) + +/* +** mul_z28_z28_z0: +** fmul {z28\.h - z29\.h}, {z28\.h - z29\.h}, {z0\.h - z1\.h} +** ret +*/ +TEST_XN (mul_z28_z28_z0, svfloat16x2_t, z28, + svmul_f16_x2 (z28, z0), + svmul (z28, z0)) + +/* +** mul_z0_z0_z18: +** fmul {z0\.h - z1\.h}, {z0\.h - z1\.h}, {z18\.h - z19\.h} +** ret +*/ +TEST_XN (mul_z0_z0_z18, svfloat16x2_t, z0, + svmul_f16_x2 (z0, z18), + svmul (z0, z18)) + +/* +** mul_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmul {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** | +** fmul {z4\.h - z5\.h}, {z4\.h - z5\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z4_z4_z23, svfloat16x2_t, z4, + svmul_f16_x2 (z4, z23), + svmul (z4, z23)) + +/* +** mul_single_z24_z24_z0: +** fmul {z24\.h - z25\.h}, {z24\.h - z25\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat16x2_t, svfloat16_t, z24, + svmul_single_f16_x2 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z24_z28_z0: +** fmul {z24\.h - z25\.h}, {z28\.h - z29\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat16x2_t, svfloat16_t, z24, + svmul_single_f16_x2 (z28, z0), + svmul (z28, z0)) + +/* +** mul_single_z24_z1_z0: +** ( +** mov z30\.d, z1\.d +** mov z31\.d, z2\.d +** | +** mov z31\.d, z2\.d +** mov z30\.d, z1\.d +** ) +** fmul {z24\.h - z25\.h}, {z30\.h - z31\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat16x2_t, svfloat16_t, z24, + svmul_single_f16_x2 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z1_z24_z0: +** fmul {z30\.h - z31\.h}, {z24\.h - z25\.h}, z0\.h +** ( +** mov z2\.d, z31\.d +** mov z1\.d, z30\.d +** | +** mov z1\.d, z30\.d +** mov z2\.d, z31\.d +** ) +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat16x2_t, svfloat16_t, z1, + svmul_single_f16_x2 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmul ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat16x2_t, svfloat16_t, z1, + svmul_single_f16_x2 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z18_z18_z0: +** fmul {z18\.h - z19\.h}, {z18\.h - z19\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat16x2_t, svfloat16_t, z18, + svmul_single_f16_x2 (z18, z0), + svmul (z18, z0)) + +/* +** mul_single_awkward: +** ... +** fmul {z0\.h - z1\.h}, {z30\.h - z31\.h}, z[0-9]+\.h +** ret +*/ +TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat16x2_t, svfloat16_t, + z0_res = svmul_single_f16_x2 (z1, z0), + z0_res = svmul (z1, z0)) + +/* +** mul_single_z0_z0_z15: +** ... +** fmul {z0\.h - z1\.h}, {z0\.h - z1\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat16x2_t, svfloat16_t, + z0 = svmul_single_f16_x2 (z0, z15), + z0 = svmul (z0, z15)) + +/* +** mul_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmul {z24\.h - z25\.h}, {z24\.h - z25\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat16x2_t, svfloat16_t, z24, + svmul_single_f16_x2 (z24, z16), + svmul (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f16_x4.c new file mode 100644 index 000000000000..8478f8a0acda --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f16_x4.c @@ -0,0 +1,227 @@ +/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme2p2" + +/* +** mul_z0_z0_z4: +** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, {z4\.h - z7\.h} +** ret +*/ +TEST_XN (mul_z0_z0_z4, svfloat16x4_t, z0, + svmul_f16_x4 (z0, z4), + svmul (z0, z4)) + +/* +** mul_z0_z4_z0: +** fmul {z0\.h - z3\.h}, {z4\.h - z7\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (mul_z0_z4_z0, svfloat16x4_t, z0, + svmul_f16_x4 (z4, z0), + svmul (z4, z0)) + +/* +** mul_z0_z4_z28: +** fmul {z0\.h - z3\.h}, {z4\.h - z7\.h}, {z28\.h - z31\.h} +** ret +*/ +TEST_XN (mul_z0_z4_z28, svfloat16x4_t, z0, + svmul_f16_x4 (z4, z28), + svmul (z4, z28)) + +/* +** mul_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z4\.h - z7\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z18_z18_z4, svfloat16x4_t, z18, + svmul_f16_x4 (z18, z4), + svmul (z18, z4)) + +/* +** mul_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z28\.h - z31\.h} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z23_z23_z28, svfloat16x4_t, z23, + svmul_f16_x4 (z23, z28), + svmul (z23, z28)) + +/* +** mul_z28_z28_z0: +** fmul {z28\.h - z31\.h}, {z28\.h - z31\.h}, {z0\.h - z3\.h} +** ret +*/ +TEST_XN (mul_z28_z28_z0, svfloat16x4_t, z28, + svmul_f16_x4 (z28, z0), + svmul (z28, z0)) + +/* +** mul_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** | +** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z0_z0_z18, svfloat16x4_t, z0, + svmul_f16_x4 (z0, z18), + svmul (z0, z18)) + +/* +** mul_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** | +** fmul {z4\.h - z7\.h}, {z4\.h - z7\.h}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z4_z4_z23, svfloat16x4_t, z4, + svmul_f16_x4 (z4, z23), + svmul (z4, z23)) + +/* +** mul_single_z24_z24_z0: +** fmul {z24\.h - z27\.h}, {z24\.h - z27\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat16x4_t, svfloat16_t, z24, + svmul_single_f16_x4 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z24_z28_z0: +** fmul {z24\.h - z27\.h}, {z28\.h - z31\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat16x4_t, svfloat16_t, z24, + svmul_single_f16_x4 (z28, z0), + svmul (z28, z0)) + +/* +** mul_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z24\.h - z27\.h}, {z28\.h - z31\.h}, z0\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat16x4_t, svfloat16_t, z24, + svmul_single_f16_x4 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z1_z24_z0: +** fmul {z28\.h - z31\.h}, {z24\.h - z27\.h}, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat16x4_t, svfloat16_t, z1, + svmul_single_f16_x4 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul ({z[0-9]+\.h - z[0-9]+\.h}), \1, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat16x4_t, svfloat16_t, z1, + svmul_single_f16_x4 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, z0\.h +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat16x4_t, svfloat16_t, z18, + svmul_single_f16_x4 (z18, z0), + svmul (z18, z0)) + +/* +** mul_single_awkward: +** ... +** fmul {z0\.h - z3\.h}, {z[0-9]+\.h - z[0-9]+\.h}, z[0-9]+\.h +** ret +*/ +TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat16x4_t, svfloat16_t, + z0_res = svmul_single_f16_x4 (z1, z0), + z0_res = svmul (z1, z0)) + +/* +** mul_single_z0_z0_z15: +** ... +** fmul {z0\.h - z3\.h}, {z0\.h - z3\.h}, z15\.h +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat16x4_t, svfloat16_t, + z0 = svmul_single_f16_x4 (z0, z15), + z0 = svmul (z0, z15)) + +/* +** mul_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmul {z24\.h - z27\.h}, {z24\.h - z27\.h}, \1\.h +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat16x4_t, svfloat16_t, z24, + svmul_single_f16_x4 (z24, z16), + svmul (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f32_x2.c new file mode 100644 index 000000000000..872409c4f347 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f32_x2.c @@ -0,0 +1,193 @@ +/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme2p2" + +/* +** mul_z0_z0_z4: +** fmul {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (mul_z0_z0_z4, svfloat32x2_t, z0, + svmul_f32_x2 (z0, z4), + svmul (z0, z4)) + +/* +** mul_z0_z4_z0: +** fmul {z0\.s - z1\.s}, {z4\.s - z5\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (mul_z0_z4_z0, svfloat32x2_t, z0, + svmul_f32_x2 (z4, z0), + svmul (z4, z0)) + +/* +** mul_z0_z4_z28: +** fmul {z0\.s - z1\.s}, {z4\.s - z5\.s}, {z28\.s - z29\.s} +** ret +*/ +TEST_XN (mul_z0_z4_z28, svfloat32x2_t, z0, + svmul_f32_x2 (z4, z28), + svmul (z4, z28)) + +/* +** mul_z18_z18_z4: +** fmul {z18\.s - z19\.s}, {z18\.s - z19\.s}, {z4\.s - z5\.s} +** ret +*/ +TEST_XN (mul_z18_z18_z4, svfloat32x2_t, z18, + svmul_f32_x2 (z18, z4), + svmul (z18, z4)) + +/* +** mul_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z18\.s - z19\.s} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z23_z23_z18, svfloat32x2_t, z23, + svmul_f32_x2 (z23, z18), + svmul (z23, z18)) + +/* +** mul_z28_z28_z0: +** fmul {z28\.s - z29\.s}, {z28\.s - z29\.s}, {z0\.s - z1\.s} +** ret +*/ +TEST_XN (mul_z28_z28_z0, svfloat32x2_t, z28, + svmul_f32_x2 (z28, z0), + svmul (z28, z0)) + +/* +** mul_z0_z0_z18: +** fmul {z0\.s - z1\.s}, {z0\.s - z1\.s}, {z18\.s - z19\.s} +** ret +*/ +TEST_XN (mul_z0_z0_z18, svfloat32x2_t, z0, + svmul_f32_x2 (z0, z18), + svmul (z0, z18)) + +/* +** mul_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmul {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** | +** fmul {z4\.s - z5\.s}, {z4\.s - z5\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z4_z4_z23, svfloat32x2_t, z4, + svmul_f32_x2 (z4, z23), + svmul (z4, z23)) + +/* +** mul_single_z24_z24_z0: +** fmul {z24\.s - z25\.s}, {z24\.s - z25\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat32x2_t, svfloat32_t, z24, + svmul_single_f32_x2 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z24_z28_z0: +** fmul {z24\.s - z25\.s}, {z28\.s - z29\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat32x2_t, svfloat32_t, z24, + svmul_single_f32_x2 (z28, z0), + svmul (z28, z0)) + +/* +** mul_single_z24_z1_z0: +** ( +** mov z30\.d, z1\.d +** mov z31\.d, z2\.d +** | +** mov z31\.d, z2\.d +** mov z30\.d, z1\.d +** ) +** fmul {z24\.s - z25\.s}, {z30\.s - z31\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat32x2_t, svfloat32_t, z24, + svmul_single_f32_x2 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z1_z24_z0: +** fmul {z30\.s - z31\.s}, {z24\.s - z25\.s}, z0\.s +** ( +** mov z2\.d, z31\.d +** mov z1\.d, z30\.d +** | +** mov z1\.d, z30\.d +** mov z2\.d, z31\.d +** ) +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat32x2_t, svfloat32_t, z1, + svmul_single_f32_x2 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmul ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat32x2_t, svfloat32_t, z1, + svmul_single_f32_x2 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z18_z18_z0: +** fmul {z18\.s - z19\.s}, {z18\.s - z19\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat32x2_t, svfloat32_t, z18, + svmul_single_f32_x2 (z18, z0), + svmul (z18, z0)) + +/* +** mul_single_awkward: +** ... +** fmul {z0\.s - z1\.s}, {z30\.s - z31\.s}, z[0-9]+\.s +** ret +*/ +TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat32x2_t, svfloat32_t, + z0_res = svmul_single_f32_x2 (z1, z0), + z0_res = svmul (z1, z0)) + +/* +** mul_single_z0_z0_z15: +** ... +** fmul {z0\.s - z1\.s}, {z0\.s - z1\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat32x2_t, svfloat32_t, + z0 = svmul_single_f32_x2 (z0, z15), + z0 = svmul (z0, z15)) + +/* +** mul_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmul {z24\.s - z25\.s}, {z24\.s - z25\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat32x2_t, svfloat32_t, z24, + svmul_single_f32_x2 (z24, z16), + svmul (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f32_x4.c new file mode 100644 index 000000000000..c0192848663a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f32_x4.c @@ -0,0 +1,227 @@ +/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme2p2" + +/* +** mul_z0_z0_z4: +** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, {z4\.s - z7\.s} +** ret +*/ +TEST_XN (mul_z0_z0_z4, svfloat32x4_t, z0, + svmul_f32_x4 (z0, z4), + svmul (z0, z4)) + +/* +** mul_z0_z4_z0: +** fmul {z0\.s - z3\.s}, {z4\.s - z7\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (mul_z0_z4_z0, svfloat32x4_t, z0, + svmul_f32_x4 (z4, z0), + svmul (z4, z0)) + +/* +** mul_z0_z4_z28: +** fmul {z0\.s - z3\.s}, {z4\.s - z7\.s}, {z28\.s - z31\.s} +** ret +*/ +TEST_XN (mul_z0_z4_z28, svfloat32x4_t, z0, + svmul_f32_x4 (z4, z28), + svmul (z4, z28)) + +/* +** mul_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z4\.s - z7\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z18_z18_z4, svfloat32x4_t, z18, + svmul_f32_x4 (z18, z4), + svmul (z18, z4)) + +/* +** mul_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z28\.s - z31\.s} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z23_z23_z28, svfloat32x4_t, z23, + svmul_f32_x4 (z23, z28), + svmul (z23, z28)) + +/* +** mul_z28_z28_z0: +** fmul {z28\.s - z31\.s}, {z28\.s - z31\.s}, {z0\.s - z3\.s} +** ret +*/ +TEST_XN (mul_z28_z28_z0, svfloat32x4_t, z28, + svmul_f32_x4 (z28, z0), + svmul (z28, z0)) + +/* +** mul_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** | +** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z0_z0_z18, svfloat32x4_t, z0, + svmul_f32_x4 (z0, z18), + svmul (z0, z18)) + +/* +** mul_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** | +** fmul {z4\.s - z7\.s}, {z4\.s - z7\.s}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z4_z4_z23, svfloat32x4_t, z4, + svmul_f32_x4 (z4, z23), + svmul (z4, z23)) + +/* +** mul_single_z24_z24_z0: +** fmul {z24\.s - z27\.s}, {z24\.s - z27\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat32x4_t, svfloat32_t, z24, + svmul_single_f32_x4 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z24_z28_z0: +** fmul {z24\.s - z27\.s}, {z28\.s - z31\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat32x4_t, svfloat32_t, z24, + svmul_single_f32_x4 (z28, z0), + svmul (z28, z0)) + +/* +** mul_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z24\.s - z27\.s}, {z28\.s - z31\.s}, z0\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat32x4_t, svfloat32_t, z24, + svmul_single_f32_x4 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z1_z24_z0: +** fmul {z28\.s - z31\.s}, {z24\.s - z27\.s}, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat32x4_t, svfloat32_t, z1, + svmul_single_f32_x4 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul ({z[0-9]+\.s - z[0-9]+\.s}), \1, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat32x4_t, svfloat32_t, z1, + svmul_single_f32_x4 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, z0\.s +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat32x4_t, svfloat32_t, z18, + svmul_single_f32_x4 (z18, z0), + svmul (z18, z0)) + +/* +** mul_single_awkward: +** ... +** fmul {z0\.s - z3\.s}, {z[0-9]+\.s - z[0-9]+\.s}, z[0-9]+\.s +** ret +*/ +TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat32x4_t, svfloat32_t, + z0_res = svmul_single_f32_x4 (z1, z0), + z0_res = svmul (z1, z0)) + +/* +** mul_single_z0_z0_z15: +** ... +** fmul {z0\.s - z3\.s}, {z0\.s - z3\.s}, z15\.s +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat32x4_t, svfloat32_t, + z0 = svmul_single_f32_x4 (z0, z15), + z0 = svmul (z0, z15)) + +/* +** mul_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmul {z24\.s - z27\.s}, {z24\.s - z27\.s}, \1\.s +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat32x4_t, svfloat32_t, z24, + svmul_single_f32_x4 (z24, z16), + svmul (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f64_x2.c new file mode 100644 index 000000000000..8b98f3b5107a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f64_x2.c @@ -0,0 +1,193 @@ +/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme2p2" + +/* +** mul_z0_z0_z4: +** fmul {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (mul_z0_z0_z4, svfloat64x2_t, z0, + svmul_f64_x2 (z0, z4), + svmul (z0, z4)) + +/* +** mul_z0_z4_z0: +** fmul {z0\.d - z1\.d}, {z4\.d - z5\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (mul_z0_z4_z0, svfloat64x2_t, z0, + svmul_f64_x2 (z4, z0), + svmul (z4, z0)) + +/* +** mul_z0_z4_z28: +** fmul {z0\.d - z1\.d}, {z4\.d - z5\.d}, {z28\.d - z29\.d} +** ret +*/ +TEST_XN (mul_z0_z4_z28, svfloat64x2_t, z0, + svmul_f64_x2 (z4, z28), + svmul (z4, z28)) + +/* +** mul_z18_z18_z4: +** fmul {z18\.d - z19\.d}, {z18\.d - z19\.d}, {z4\.d - z5\.d} +** ret +*/ +TEST_XN (mul_z18_z18_z4, svfloat64x2_t, z18, + svmul_f64_x2 (z18, z4), + svmul (z18, z4)) + +/* +** mul_z23_z23_z18: +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z18\.d - z19\.d} +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z23_z23_z18, svfloat64x2_t, z23, + svmul_f64_x2 (z23, z18), + svmul (z23, z18)) + +/* +** mul_z28_z28_z0: +** fmul {z28\.d - z29\.d}, {z28\.d - z29\.d}, {z0\.d - z1\.d} +** ret +*/ +TEST_XN (mul_z28_z28_z0, svfloat64x2_t, z28, + svmul_f64_x2 (z28, z0), + svmul (z28, z0)) + +/* +** mul_z0_z0_z18: +** fmul {z0\.d - z1\.d}, {z0\.d - z1\.d}, {z18\.d - z19\.d} +** ret +*/ +TEST_XN (mul_z0_z0_z18, svfloat64x2_t, z0, + svmul_f64_x2 (z0, z18), + svmul (z0, z18)) + +/* +** mul_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** fmul {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** | +** fmul {z4\.d - z5\.d}, {z4\.d - z5\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z4_z4_z23, svfloat64x2_t, z4, + svmul_f64_x2 (z4, z23), + svmul (z4, z23)) + +/* +** mul_single_z24_z24_z0: +** fmul {z24\.d - z25\.d}, {z24\.d - z25\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat64x2_t, svfloat64_t, z24, + svmul_single_f64_x2 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z24_z28_z0: +** fmul {z24\.d - z25\.d}, {z28\.d - z29\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat64x2_t, svfloat64_t, z24, + svmul_single_f64_x2 (z28, z0), + svmul (z28, z0)) + +/* +** mul_single_z24_z1_z0: +** ( +** mov z30\.d, z1\.d +** mov z31\.d, z2\.d +** | +** mov z31\.d, z2\.d +** mov z30\.d, z1\.d +** ) +** fmul {z24\.d - z25\.d}, {z30\.d - z31\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat64x2_t, svfloat64_t, z24, + svmul_single_f64_x2 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z1_z24_z0: +** fmul {z30\.d - z31\.d}, {z24\.d - z25\.d}, z0\.d +** ( +** mov z2\.d, z31\.d +** mov z1\.d, z30\.d +** | +** mov z1\.d, z30\.d +** mov z2\.d, z31\.d +** ) +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat64x2_t, svfloat64_t, z1, + svmul_single_f64_x2 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** fmul ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat64x2_t, svfloat64_t, z1, + svmul_single_f64_x2 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z18_z18_z0: +** fmul {z18\.d - z19\.d}, {z18\.d - z19\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat64x2_t, svfloat64_t, z18, + svmul_single_f64_x2 (z18, z0), + svmul (z18, z0)) + +/* +** mul_single_awkward: +** ... +** fmul {z0\.d - z1\.d}, {z30\.d - z31\.d}, z[0-9]+\.d +** ret +*/ +TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat64x2_t, svfloat64_t, + z0_res = svmul_single_f64_x2 (z1, z0), + z0_res = svmul (z1, z0)) + +/* +** mul_single_z0_z0_z15: +** ... +** fmul {z0\.d - z1\.d}, {z0\.d - z1\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat64x2_t, svfloat64_t, + z0 = svmul_single_f64_x2 (z0, z15), + z0 = svmul (z0, z15)) + +/* +** mul_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmul {z24\.d - z25\.d}, {z24\.d - z25\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat64x2_t, svfloat64_t, z24, + svmul_single_f64_x2 (z24, z16), + svmul (z24, z16)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f64_x4.c new file mode 100644 index 000000000000..a9754caea0d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mul_f64_x4.c @@ -0,0 +1,227 @@ +/* { dg-do assemble { target aarch64_asm_sme2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sme2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme2p2" + +/* +** mul_z0_z0_z4: +** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, {z4\.d - z7\.d} +** ret +*/ +TEST_XN (mul_z0_z0_z4, svfloat64x4_t, z0, + svmul_f64_x4 (z0, z4), + svmul (z0, z4)) + +/* +** mul_z0_z4_z0: +** fmul {z0\.d - z3\.d}, {z4\.d - z7\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (mul_z0_z4_z0, svfloat64x4_t, z0, + svmul_f64_x4 (z4, z0), + svmul (z4, z0)) + +/* +** mul_z0_z4_z28: +** fmul {z0\.d - z3\.d}, {z4\.d - z7\.d}, {z28\.d - z31\.d} +** ret +*/ +TEST_XN (mul_z0_z4_z28, svfloat64x4_t, z0, + svmul_f64_x4 (z4, z28), + svmul (z4, z28)) + +/* +** mul_z18_z18_z4: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z4\.d - z7\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z18_z18_z4, svfloat64x4_t, z18, + svmul_f64_x4 (z18, z4), + svmul (z18, z4)) + +/* +** mul_z23_z23_z28: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, {z28\.d - z31\.d} +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN (mul_z23_z23_z28, svfloat64x4_t, z23, + svmul_f64_x4 (z23, z28), + svmul (z23, z28)) + +/* +** mul_z28_z28_z0: +** fmul {z28\.d - z31\.d}, {z28\.d - z31\.d}, {z0\.d - z3\.d} +** ret +*/ +TEST_XN (mul_z28_z28_z0, svfloat64x4_t, z28, + svmul_f64_x4 (z28, z0), + svmul (z28, z0)) + +/* +** mul_z0_z0_z18: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** | +** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z0_z0_z18, svfloat64x4_t, z0, + svmul_f64_x4 (z0, z18), + svmul (z0, z18)) + +/* +** mul_z4_z4_z23: +** ( +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** | +** fmul {z4\.d - z7\.d}, {z4\.d - z7\.d}, [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ) +** ret +*/ +TEST_XN (mul_z4_z4_z23, svfloat64x4_t, z4, + svmul_f64_x4 (z4, z23), + svmul (z4, z23)) + +/* +** mul_single_z24_z24_z0: +** fmul {z24\.d - z27\.d}, {z24\.d - z27\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z0, svfloat64x4_t, svfloat64_t, z24, + svmul_single_f64_x4 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z24_z28_z0: +** fmul {z24\.d - z27\.d}, {z28\.d - z31\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z28_z0, svfloat64x4_t, svfloat64_t, z24, + svmul_single_f64_x4 (z28, z0), + svmul (z28, z0)) + +/* +** mul_single_z24_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul {z24\.d - z27\.d}, {z28\.d - z31\.d}, z0\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z1_z0, svfloat64x4_t, svfloat64_t, z24, + svmul_single_f64_x4 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z1_z24_z0: +** fmul {z28\.d - z31\.d}, {z24\.d - z27\.d}, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z24_z0, svfloat64x4_t, svfloat64_t, z1, + svmul_single_f64_x4 (z24, z0), + svmul (z24, z0)) + +/* +** mul_single_z1_z1_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul ({z[0-9]+\.d - z[0-9]+\.d}), \1, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z1_z1_z0, svfloat64x4_t, svfloat64_t, z1, + svmul_single_f64_x4 (z1, z0), + svmul (z1, z0)) + +/* +** mul_single_z18_z18_z0: +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmul [^\n]+, z0\.d +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** ret +*/ +TEST_XN_SINGLE (mul_single_z18_z18_z0, svfloat64x4_t, svfloat64_t, z18, + svmul_single_f64_x4 (z18, z0), + svmul (z18, z0)) + +/* +** mul_single_awkward: +** ... +** fmul {z0\.d - z3\.d}, {z[0-9]+\.d - z[0-9]+\.d}, z[0-9]+\.d +** ret +*/ +TEST_XN_SINGLE_AWKWARD (mul_single_awkward, svfloat64x4_t, svfloat64_t, + z0_res = svmul_single_f64_x4 (z1, z0), + z0_res = svmul (z1, z0)) + +/* +** mul_single_z0_z0_z15: +** ... +** fmul {z0\.d - z3\.d}, {z0\.d - z3\.d}, z15\.d +** ... +** ret +*/ +TEST_XN_SINGLE_Z15 (mul_single_z0_z0_z15, svfloat64x4_t, svfloat64_t, + z0 = svmul_single_f64_x4 (z0, z15), + z0 = svmul (z0, z15)) + +/* +** mul_single_z24_z24_z16: +** mov (z[0-7])\.d, z16\.d +** fmul {z24\.d - z27\.d}, {z24\.d - z27\.d}, \1\.d +** ret +*/ +TEST_XN_SINGLE (mul_single_z24_z24_z16, svfloat64x4_t, svfloat64_t, z24, + svmul_single_f64_x4 (z24, z16), + svmul (z24, z16))
