https://gcc.gnu.org/g:7c7ace08e4aab91ca0ce4bbd318af181da7c460f
commit r17-975-g7c7ace08e4aab91ca0ce4bbd318af181da7c460f Author: Artemiy Volkov <[email protected]> Date: Fri Jan 9 19:30:52 2026 +0000 aarch64: add zeroing forms for predicated SVE int-/FP-to-FP conversions SVE2.2 (or in streaming mode, SME2.2) adds support for zeroing predication for the following SVE FP conversion instructions: SVE1: - SCVTF (Signed integer convert to floating-point (predicated)) - UCVTF (Unsigned integer convert to floating-point (predicated)) - FCVT (Floating-point convert (predicated)) - BFCVT (Single-precision convert to BFloat16 (predicated)) SVE2: - FCVTX (Double-precision convert to single-precision, rounding to odd (predicated)) The SVE1 instructions are spread over several patterns for various combinations of source/destination widths and FP semantics, and the FCVTX instruction is serviced by two patterns in the aarch64-sve2.md file via the SVE2_COND_FP_UNARY_NARROWB iterator (one for strict, the other for relaxed FP semantics). The patch adds an alternative that emits a single zeroing-predication version of an instruction whenever the merge operand is a constant zero vector and the sve2p2_or_sme2p2 condition holds. As with the original cvt_b?f* tests in the sve/acle/asm directory, testcases for conversions from both integral and floating-point types coexist in the same files and are grouped only by the destination type. FCVTX tests are added in a separate file. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed): New alternative for zeroing predication. Add `arch` attribute to every alternative. (*cond_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>_relaxed): Likewise. (*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict): Likewise. (*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>): Likewise. (*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>): Likewise. (*cond_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>): Likewise. (*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise. (*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>): Likewise. (*cond_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>_relaxed): Likewise. * config/aarch64/aarch64-sve2.md (*cond_<sve_fp_op><mode>_any_relaxed): Likewise. (*cond_<sve_fp_op><mode>_any_strict): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c: New test. * gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve.md | 81 ++++++----- gcc/config/aarch64/aarch64-sve2.md | 18 +-- .../gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c | 28 ++++ .../gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c | 160 +++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c | 122 ++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c | 120 ++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c | 29 ++++ 7 files changed, 514 insertions(+), 44 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a4954df9d7af..c7fe14973f95 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -11027,10 +11027,11 @@ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> - [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype> + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -11053,10 +11054,11 @@ UNSPEC_SEL))] "TARGET_SVE && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> - [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/z, %2.<SVE_HSDI:Vetype> + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -11076,10 +11078,11 @@ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> - [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype> + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> } [(set_attr "sve_type" "sve_int_cvt")] ) @@ -11111,10 +11114,11 @@ (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx4SI_ONLY:Vetype> + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> } [(set_attr "sve_type" "sve_int_cvt")] ) @@ -11270,10 +11274,11 @@ (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype> + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -11291,10 +11296,11 @@ (match_operand:SVE_PARTIAL_HSF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/z, %2.<SVE_SDF:Vetype> + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -11353,10 +11359,11 @@ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE_BF16" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] bfcvt\t%0.h, %1/m, %2.s - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] bfcvt\t%0.h, %1/m, %2.s + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] bfcvt\t%0.h, %1/z, %2.s + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -11493,10 +11500,11 @@ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_HSF:Vetype> + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -11514,10 +11522,11 @@ (match_operand:SVE_SDF 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> - [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ w , Upl , w , 0 ; * , * ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + [ w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] fcvt\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_PARTIAL_HSF:Vetype> + [ ?&w , Upl , w , Dz ; yes , * ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> } "&& !rtx_equal_p (operands[1], operands[4])" { diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index f4e8709cb522..995b08f084cf 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -3596,10 +3596,11 @@ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> - [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> - [ &w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Vewtype> + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> + [ &w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> } "&& !rtx_equal_p (operands[1], operands[4])" { @@ -3620,10 +3621,11 @@ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> - [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> - [ &w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx, arch ] + [ &w , Upl , w , 0 ; * , * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> + [ &w , Upl , w , Dz ; * , sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Vewtype> + [ &w , Upl , w , Dz ; yes , * ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> + [ &w , Upl , w , w ; yes , * ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> } [(set_attr "sve_type" "sve_fp_cvt")] ) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c new file mode 100644 index 000000000000..b9421e60c91a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_bf16_z.c @@ -0,0 +1,28 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2+bf16" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_bf16_f32_z_tied1: +** bfcvt z0\.h, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t, + z0_res = svcvt_bf16_f32_z (p0, z0), + z0_res = svcvt_bf16_z (p0, z0)) + +/* +** cvt_bf16_f32_z_untied: +** bfcvt z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t, + z0 = svcvt_bf16_f32_z (p0, z4), + z0 = svcvt_bf16_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c new file mode 100644 index 000000000000..7b164b73587d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f16_z.c @@ -0,0 +1,160 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_f16_f32_z_tied1: +** fcvt z0\.h, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_f32_z_tied1, svfloat16_t, svfloat32_t, + z0_res = svcvt_f16_f32_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_f32_z_untied: +** fcvt z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f16_f32_z_untied, svfloat16_t, svfloat32_t, + z0 = svcvt_f16_f32_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_f64_z_tied1: +** fcvt z0\.h, p0/z, z0\.d +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_f64_z_tied1, svfloat16_t, svfloat64_t, + z0_res = svcvt_f16_f64_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_f64_z_untied: +** fcvt z0\.h, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f16_f64_z_untied, svfloat16_t, svfloat64_t, + z0 = svcvt_f16_f64_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_s16_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** scvtf z0\.h, p0/z, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_s16_z_tied1, svfloat16_t, svint16_t, + z0_res = svcvt_f16_s16_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_s16_z_untied: +** scvtf z0\.h, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f16_s16_z_untied, svfloat16_t, svint16_t, + z0 = svcvt_f16_s16_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_s32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** scvtf z0\.h, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_s32_z_tied1, svfloat16_t, svint32_t, + z0_res = svcvt_f16_s32_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_s32_z_untied: +** scvtf z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f16_s32_z_untied, svfloat16_t, svint32_t, + z0 = svcvt_f16_s32_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_s64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** scvtf z0\.h, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_s64_z_tied1, svfloat16_t, svint64_t, + z0_res = svcvt_f16_s64_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_s64_z_untied: +** scvtf z0\.h, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f16_s64_z_untied, svfloat16_t, svint64_t, + z0 = svcvt_f16_s64_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_u16_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** ucvtf z0\.h, p0/z, \1\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_u16_z_tied1, svfloat16_t, svuint16_t, + z0_res = svcvt_f16_u16_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_u16_z_untied: +** ucvtf z0\.h, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f16_u16_z_untied, svfloat16_t, svuint16_t, + z0 = svcvt_f16_u16_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_u32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** ucvtf z0\.h, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_u32_z_tied1, svfloat16_t, svuint32_t, + z0_res = svcvt_f16_u32_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_u32_z_untied: +** ucvtf z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f16_u32_z_untied, svfloat16_t, svuint32_t, + z0 = svcvt_f16_u32_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) + +/* +** cvt_f16_u64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** ucvtf z0\.h, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f16_u64_z_tied1, svfloat16_t, svuint64_t, + z0_res = svcvt_f16_u64_z (p0, z0), + z0_res = svcvt_f16_z (p0, z0)) + +/* +** cvt_f16_u64_z_untied: +** ucvtf z0\.h, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f16_u64_z_untied, svfloat16_t, svuint64_t, + z0 = svcvt_f16_u64_z (p0, z4), + z0 = svcvt_f16_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c new file mode 100644 index 000000000000..2f970f2f61bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f32_z.c @@ -0,0 +1,122 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_f32_f16_z_tied1: +** fcvt z0\.s, p0/z, z0\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_f16_z_tied1, svfloat32_t, svfloat16_t, + z0_res = svcvt_f32_f16_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_f16_z_untied: +** fcvt z0\.s, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f32_f16_z_untied, svfloat32_t, svfloat16_t, + z0 = svcvt_f32_f16_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_f64_z_tied1: +** fcvt z0\.s, p0/z, z0\.d +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_f64_z_tied1, svfloat32_t, svfloat64_t, + z0_res = svcvt_f32_f64_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_f64_z_untied: +** fcvt z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f32_f64_z_untied, svfloat32_t, svfloat64_t, + z0 = svcvt_f32_f64_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_s32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** scvtf z0\.s, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_s32_z_tied1, svfloat32_t, svint32_t, + z0_res = svcvt_f32_s32_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_s32_z_untied: +** scvtf z0\.s, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f32_s32_z_untied, svfloat32_t, svint32_t, + z0 = svcvt_f32_s32_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_s64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** scvtf z0\.s, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_s64_z_tied1, svfloat32_t, svint64_t, + z0_res = svcvt_f32_s64_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_s64_z_untied: +** scvtf z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f32_s64_z_untied, svfloat32_t, svint64_t, + z0 = svcvt_f32_s64_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_u32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** ucvtf z0\.s, p0/z, \1\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_u32_z_tied1, svfloat32_t, svuint32_t, + z0_res = svcvt_f32_u32_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_u32_z_untied: +** ucvtf z0\.s, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f32_u32_z_untied, svfloat32_t, svuint32_t, + z0 = svcvt_f32_u32_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) + +/* +** cvt_f32_u64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** ucvtf z0\.s, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f32_u64_z_tied1, svfloat32_t, svuint64_t, + z0_res = svcvt_f32_u64_z (p0, z0), + z0_res = svcvt_f32_z (p0, z0)) + +/* +** cvt_f32_u64_z_untied: +** ucvtf z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f32_u64_z_untied, svfloat32_t, svuint64_t, + z0 = svcvt_f32_u64_z (p0, z4), + z0 = svcvt_f32_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c new file mode 100644 index 000000000000..5a202a1f46fc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_f64_z.c @@ -0,0 +1,120 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvt_f64_f16_z_tied1: +** fcvt z0\.d, p0/z, z0\.h +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_f16_z_tied1, svfloat64_t, svfloat16_t, + z0_res = svcvt_f64_f16_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_f16_z_untied: +** fcvt z0\.d, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvt_f64_f16_z_untied, svfloat64_t, svfloat16_t, + z0 = svcvt_f64_f16_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_f32_z_tied1: +** fcvt z0\.d, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_f32_z_tied1, svfloat64_t, svfloat32_t, + z0_res = svcvt_f64_f32_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_f32_z_untied: +** fcvt z0\.d, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f64_f32_z_untied, svfloat64_t, svfloat32_t, + z0 = svcvt_f64_f32_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_s32_z_tied1: +** scvtf z0\.d, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_s32_z_tied1, svfloat64_t, svint32_t, + z0_res = svcvt_f64_s32_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_s32_z_untied: +** scvtf z0\.d, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f64_s32_z_untied, svfloat64_t, svint32_t, + z0 = svcvt_f64_s32_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_s64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** scvtf z0\.d, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_s64_z_tied1, svfloat64_t, svint64_t, + z0_res = svcvt_f64_s64_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_s64_z_untied: +** scvtf z0\.d, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f64_s64_z_untied, svfloat64_t, svint64_t, + z0 = svcvt_f64_s64_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_u32_z_tied1: +** ucvtf z0\.d, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_u32_z_tied1, svfloat64_t, svuint32_t, + z0_res = svcvt_f64_u32_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_u32_z_untied: +** ucvtf z0\.d, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvt_f64_u32_z_untied, svfloat64_t, svuint32_t, + z0 = svcvt_f64_u32_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) + +/* +** cvt_f64_u64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** ucvtf z0\.d, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvt_f64_u64_z_tied1, svfloat64_t, svuint64_t, + z0_res = svcvt_f64_u64_z (p0, z0), + z0_res = svcvt_f64_z (p0, z0)) + +/* +** cvt_f64_u64_z_untied: +** ucvtf z0\.d, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvt_f64_u64_z_untied, svfloat64_t, svuint64_t, + z0 = svcvt_f64_u64_z (p0, z4), + z0 = svcvt_f64_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c new file mode 100644 index 000000000000..d5d94f227d61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtx_f32_z.c @@ -0,0 +1,29 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtx_f32_f64_z_tied1: +** mov (z[0-9]+\.d), z0\.d +** fcvtx z0\.s, p0/z, \1 +** ret +*/ +TEST_DUAL_Z_REV (cvtx_f32_f64_z_tied1, svfloat32_t, svfloat64_t, + z0_res = svcvtx_f32_f64_z (p0, z0), + z0_res = svcvtx_f32_z (p0, z0)) + +/* +** cvtx_f32_f64_z_untied: +** fcvtx z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvtx_f32_f64_z_untied, svfloat32_t, svfloat64_t, + z0 = svcvtx_f32_f64_z (p0, z4), + z0 = svcvtx_f32_z (p0, z4))
