Re: [GCC17 PATCH 08/12] aarch64: add zeroing forms for predicated SVE top FP conversions

Kyrylo Tkachov Thu, 05 Feb 2026 05:17:06 -0800

> On 2 Feb 2026, at 14:22, Artemiy Volkov <[email protected]> wrote:
> 
> SVE2.2 (or in streaming mode, SME2.2) adds support for zeroing
> predication for the following SVE FP conversion instructions:
> 
> SVE1:
> - BFCVTNT (Single-precision convert to BFloat16 (top, predicated))
> 
> SVE2:
> - FCVTLT (Floating-point widening convert (top, predicated))
> - FCVTNT (Floating-point narrowing convert (top, predicated))
> - FCVTXNT (Double-precision convert to single-precision, rounding
>  to odd (top, predicated))
> 
> Additionally, this patch implements corresponding intrinsics documented in
> the ACLE manual [0] with the following signatures:
> 
> svfloat{32,64}_t svcvtlt_{f32[_f16],_f64[_f32]}_z
>  (svbool_t pg, svfloat{16,32}_t op);
> 
> sv{bfloat16,float16,float32}_t svcvtnt_{f16[_f32],_f32[_f64],_bf16[_f32]}_z
>  (sv{bfloat16,float16,float32}_t even, svbool_t pg, svfloat{32,64}_t op);
> 
> svfloat32_t svcvtxnt_f32[_f64]_z
>  (svfloat32_t even, svbool_t pg, svfloat64_t op);
> 
> This patch adds an alternative that emits a single zeroing-predication
> form of the instructions mentioned above (as long as the sve2p2_or_sme2p2
> condition holds) to corresponding RTL patterns.  For narrowing conversions
> ([B]FCVTNT and FCVTXNT), since an additional merge operand controlling the
> values of inactive lanes is required, the intrinsics have been changed to
> use the new top_narrowing_convert SVE function base class; this new class
> injects a const_vector selector operand at expand time.  Depending on the
> value of this operand, either the destination vector or a constant zero
> vector is used to supply values for inactive lanes.
> 
> The new tests all have "_z" in their names since they only cover the
> zeroing-predication versions of their respective intrinsics.
> 
> [0] https://github.com/ARM-software/acle
> 
> gcc/ChangeLog:
> 
> * config/aarch64/aarch64-sve-builtins-base.cc (class svcvtnt_impl):
> Remove.
> (svcvtnt): Redefine using narrowing_top_convert.
> * config/aarch64/aarch64-sve-builtins-functions.h
> (class narrowing_top_convert): New SVE function base class.
> (NARROWING_TOP_CONVERT0): New function-like macro for specializing
> narrowing_top_convert.
> (NARROWING_TOP_CONVERT1): Likewise.
> * config/aarch64/aarch64-sve-builtins-sve2.cc (class svcvtxnt_impl):
> Remove.
> (svcvtxnt): Redefine using narrowing_top_convert.
> * config/aarch64/aarch64-sve-builtins-sve2.def (svcvtlt): Allow
> zeroing predication.
> (svcvtnt): Likewise.
> (svcvtxnt): Likewise.
> * config/aarch64/aarch64-sve.md (@aarch64_sve_cvtnt<mode>):
> Convert to compact syntax. Add operand 4 for values of
> inactive lanes.  New alternative for zeroing predication.
> * config/aarch64/aarch64-sve2.md
> (*cond_<sve_fp_op><mode>_relaxed): Convert to compact syntax.
> New alternative for zeroing predication.
> (*cond_<sve_fp_op><mode>_strict): Likewise.
> (@aarch64_sve_cvtnt<mode>): Convert to compact syntax. Add
> operand 4 for values of inactive lanes.  New alternative for
> zeroing predication.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c: New test.
> * gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c: Likewise.
> ---
> .../aarch64/aarch64-sve-builtins-base.cc      | 14 +----
> .../aarch64/aarch64-sve-builtins-functions.h  | 41 ++++++++++++++
> .../aarch64/aarch64-sve-builtins-sve2.cc      | 14 +----
> .../aarch64/aarch64-sve-builtins-sve2.def     |  7 +++
> gcc/config/aarch64/aarch64-sve.md             | 16 ++++--
> gcc/config/aarch64/aarch64-sve2.md            | 55 ++++++++++++-------
> .../aarch64/sve2/acle/asm/cvtlt_f32_z.c       | 28 ++++++++++
> .../aarch64/sve2/acle/asm/cvtlt_f64_z.c       | 28 ++++++++++
> .../aarch64/sve2/acle/asm/cvtnt_bf16_z.c      | 34 ++++++++++++
> .../aarch64/sve2/acle/asm/cvtnt_f16_z.c       | 34 ++++++++++++
> .../aarch64/sve2/acle/asm/cvtnt_f32_z.c       | 34 ++++++++++++
> .../aarch64/sve2/acle/asm/cvtxnt_f32_z.c      | 34 ++++++++++++
> 12 files changed, 287 insertions(+), 52 deletions(-)
> create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c
> create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c
> create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c
> create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c
> create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c
> create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c
> 
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 918642a45c1..f2028c27172 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -855,18 +855,6 @@ public:
>   }
> };
> 
> -class svcvtnt_impl : public CODE_FOR_MODE0 (aarch64_sve_cvtnt)
> -{
> -public:
> -  gimple *
> -  fold (gimple_folder &f) const override
> -  {
> -    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
> -      f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
> -    return NULL;
> -  }
> -};
> -
> class svdiv_impl : public rtx_code_function
> {
> public:
> @@ -3580,7 +3568,7 @@ FUNCTION (svcreate2, svcreate_impl, (2))
> FUNCTION (svcreate3, svcreate_impl, (3))
> FUNCTION (svcreate4, svcreate_impl, (4))
> FUNCTION (svcvt, svcvt_impl,)
> -FUNCTION (svcvtnt, svcvtnt_impl,)
> +FUNCTION (svcvtnt, NARROWING_TOP_CONVERT0 (aarch64_sve_cvtnt),)
> FUNCTION (svdiv, svdiv_impl,)
> FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
> FUNCTION (svdot, svdot_impl,)
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h 
> b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
> index f32d2fd54e7..521bea72c25 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
> @@ -916,6 +916,47 @@ public:
>   int m_unspec_for_uint;
> };
> 
> +template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N>
> +class narrowing_top_convert : public code_for_mode_function <CODE_FOR_MODE, 
> N>
> +{
> +  using base = code_for_mode_function <CODE_FOR_MODE, N>;
> +
> +public:
> +  gimple *
> +  fold (gimple_folder &f) const override
> +  {
> +    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
> +      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
> +    return NULL;
> +  }
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    /* If the instruction is predicated, Add a selector argument for the
> +       values of inactive lanes, which is equal to all ones for merging
> +       predication and to all zeros for zeroing predication.  */
> +    if (e.pred == PRED_none)
> +      ;
> +    else if (e.pred == PRED_z)
> +      {
> + e.args.quick_push (CONST0_RTX (e.result_mode ()));
> +      }
> +    else
> +      {
> + gcc_assert (e.pred == PRED_m || e.pred == PRED_x);
> + e.args.quick_push (CONST1_RTX (e.result_mode ()));
> +      }
> +
> +    return base::expand (e);
> +  }
> +};
> +
> +#define NARROWING_TOP_CONVERT0(PATTERN)\
> +  narrowing_top_convert<code_for_##PATTERN, 0>
> +#define NARROWING_TOP_CONVERT1(PATTERN)\
> +  narrowing_top_convert<code_for_##PATTERN, 1>
> +
> }
> 
> /* Declare the global function base NAME, creating it from an instance
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> index c4c8bae86b8..86ea2efe5aa 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> @@ -252,18 +252,6 @@ public:
>   }
> };
> 
> -class svcvtxnt_impl : public CODE_FOR_MODE1 (aarch64_sve2_cvtxnt)
> -{
> -public:
> -  gimple *
> -  fold (gimple_folder &f) const override
> -  {
> -    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
> -      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
> -    return NULL;
> -  }
> -};
> -
> class svdup_laneq_impl : public function_base
> {
> public:
> @@ -1028,7 +1016,7 @@ FUNCTION (svcvtlt, unspec_based_function, (-1, -1, 
> UNSPEC_COND_FCVTLT))
> FUNCTION (svcvtn, svcvtn_impl,)
> FUNCTION (svcvtnb, fixed_insn_function, 
> (CODE_FOR_aarch64_sve2_fp8_cvtnbvnx16qi))
> FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
> -FUNCTION (svcvtxnt, svcvtxnt_impl,)
> +FUNCTION (svcvtxnt, NARROWING_TOP_CONVERT1 (aarch64_sve2_cvtxnt),)
> FUNCTION (svdup_laneq, svdup_laneq_impl,)
> FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
> FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1))
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def 
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> index 8058beeb8a8..87f5844641d 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> @@ -300,6 +300,13 @@ DEF_SVE_FUNCTION (svst1q_scatter, store_scatter64_index, 
> hsd_data, implicit)
> DEF_SVE_FUNCTION (svst1wq, store, s_data, implicit)
> #undef REQUIRED_EXTENSIONS
> 
> +#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p2, 
> AARCH64_FL_SME2p2)
> +DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z)
> +DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z)
> +DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z)
> +DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z)
> +#undef REQUIRED_EXTENSIONS
> +
> #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
> DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
> DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 8742f13b3a8..b6044adb5fb 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -11315,15 +11315,19 @@
> ;;
> ;; This instructions does not take MOVPRFX.
> (define_insn "@aarch64_sve_cvtnt<mode>"
> -  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
> +  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
> (unspec:VNx8BF_ONLY
> -  [(match_operand:VNx4BI 2 "register_operand" "Upl")
> +  [(match_operand:VNx4BI 2 "register_operand")
>   (const_int SVE_STRICT_GP)
> -   (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
> -   (match_operand:VNx4SF 3 "register_operand" "w")]
> +   (match_operand:VNx8BF_ONLY 1 "register_operand")
> +   (match_operand:VNx8BF_ONLY 4 "aarch64_constant_vector_operand")
> +   (match_operand:VNx4SF 3 "register_operand")]
>  UNSPEC_COND_FCVTNT))]
> -  "TARGET_SVE_BF16"
> -  "bfcvtnt\t%0.h, %2/m, %3.s"
> +  "TARGET_SVE_BF16 || TARGET_SVE2p2_OR_SME2p2"
> +  {@ [ cons: =0 , 1 , 2   , 3 , 4   ; attrs: arch ]
> +     [ w        , 0 , Upl , w , vs1 ; *                ] bfcvtnt\t%0.h, 
> %2/m, %3.s
> +     [ w        , 0 , Upl , w , Dz  ; sve2p2_or_sme2p2 ] bfcvtnt\t%0.h, 
> %2/z, %3.s
> +  }
>   [(set_attr "sve_type" "sve_fp_cvt")]
> )
> 
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index cbac0c9a08a..5fd9631dda7 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -3479,18 +3479,21 @@
> 
> ;; These instructions do not take MOVPRFX.
> (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
> -  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
> +  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
> (unspec:SVE_FULL_SDF
> -  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +  [(match_operand:<VPRED> 1 "register_operand")
>   (unspec:SVE_FULL_SDF
>     [(match_operand 4)
>      (const_int SVE_RELAXED_GP)
> -      (match_operand:<VNARROW> 2 "register_operand" "w")]
> +      (match_operand:<VNARROW> 2 "register_operand")]
>     SVE2_COND_FP_UNARY_LONG)
> -   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
> +   (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
>  UNSPEC_SEL))]
>   "TARGET_SVE2"
> -  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
> +  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: arch ]
> +     [ w        , Upl , w , 0  ; *                ] 
> <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>
> +     [ w        , Upl , w , Dz ; sve2p2_or_sme2p2 ] 
> <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Ventype>

If the target is not sve2p2_or_sme2p2 then the predicate allows zero but no 
alternative can handle it, which may cause problems down the line.
Should the predicate for operand 3 be tightened to allow zero only in the 
sve2p2_or_sme2p2 case? 

> +  }
>   "&& !rtx_equal_p (operands[1], operands[4])"
>   {
>     operands[4] = copy_rtx (operands[1]);
> @@ -3499,18 +3502,21 @@
> )
> 
> (define_insn "*cond_<sve_fp_op><mode>_strict"
> -  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
> +  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
> (unspec:SVE_FULL_SDF
> -  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +  [(match_operand:<VPRED> 1 "register_operand")
>   (unspec:SVE_FULL_SDF
>     [(match_dup 1)
>      (const_int SVE_STRICT_GP)
> -      (match_operand:<VNARROW> 2 "register_operand" "w")]
> +      (match_operand:<VNARROW> 2 "register_operand")]
>     SVE2_COND_FP_UNARY_LONG)
> -   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
> +   (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
>  UNSPEC_SEL))]
>   "TARGET_SVE2"
> -  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
> +  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: arch ]
> +     [ w        , Upl , w , 0  ; *                ] 
> <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>
> +     [ w        , Upl , w , Dz ; sve2p2_or_sme2p2 ] 
> <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Ventype>

Same here.
Thanks,
Kyrill

> +  }
>   [(set_attr "sve_type" "sve_fp_cvt")]
> )
> 
> @@ -3540,15 +3546,19 @@
> ;;
> ;; These instructions do not take MOVPRFX.
> (define_insn "@aarch64_sve_cvtnt<mode>"
> -  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
> +  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
> (unspec:SVE_FULL_HSF
> -  [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
> +  [(match_operand:<VWIDE_PRED> 2 "register_operand")
>   (const_int SVE_STRICT_GP)
> -   (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
> -   (match_operand:<VWIDE> 3 "register_operand" "w")]
> +   (match_operand:SVE_FULL_HSF 1 "register_operand")
> +   (match_operand:SVE_FULL_HSF 4 "aarch64_constant_vector_operand")
> +   (match_operand:<VWIDE> 3 "register_operand")]
>  UNSPEC_COND_FCVTNT))]
>   "TARGET_SVE2"
> -  "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
> +  {@ [ cons: =0 , 1 , 2   , 3 , 4   ; attrs: arch ]
> +     [ w        , 0 , Upl , w , vs1 ; *                ] 
> fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>
> +     [ w        , 0 , Upl , w , Dz  ; sve2p2_or_sme2p2 ] 
> fcvtnt\t%0.<Vetype>, %2/z, %3.<Vewtype>
> +  }
>   [(set_attr "sve_type" "sve_fp_cvt")]
> )
> 
> @@ -3636,18 +3646,23 @@
> ;;
> ;; These instructions do not take MOVPRFX.
> (define_insn "@aarch64_sve2_cvtxnt<mode>"
> -  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
> +  [(set (match_operand:<VNARROW> 0 "register_operand")
> (unspec:<VNARROW>
> -  [(match_operand:<VPRED> 2 "register_operand" "Upl")
> +  [(match_operand:<VPRED> 2 "register_operand")
>   (const_int SVE_STRICT_GP)
> -   (match_operand:<VNARROW> 1 "register_operand" "0")
> -   (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
> +   (match_operand:<VNARROW> 1 "register_operand")
> +   (match_operand:<VNARROW> 4 "aarch64_constant_vector_operand")
> +   (match_operand:VNx2DF_ONLY 3 "register_operand")]
>  UNSPEC_COND_FCVTXNT))]
>   "TARGET_SVE2"
> -  "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
> +  {@ [ cons: =0 , 1 , 2   , 3 , 4    ; attrs: arch ]
> +     [ w        , 0 , Upl , w , vs1  ; *                ] 
> fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>
> +     [ w        , 0 , Upl , w , Dz   ; sve2p2_or_sme2p2 ] 
> fcvtxnt\t%0.<Ventype>, %2/z, %3.<Vetype>
> +  }
>   [(set_attr "sve_type" "sve_fp_cvt")]
> )
> 
> +
> ;; -------------------------------------------------------------------------
> ;; ---- [FP<-FP] Multi-vector widening conversions
> ;; -------------------------------------------------------------------------
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c
> new file mode 100644
> index 00000000000..c7ca18e6386
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c
> @@ -0,0 +1,28 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** cvtlt_f32_f16_z_tied1:
> +** fcvtlt z0\.s, p0/z, z0\.h
> +** ret
> +*/
> +TEST_DUAL_Z_REV (cvtlt_f32_f16_z_tied1, svfloat32_t, svfloat16_t,
> + z0_res = svcvtlt_f32_f16_z (p0, z0),
> + z0_res = svcvtlt_f32_z (p0, z0))
> +
> +/*
> +** cvtlt_f32_f16_z_untied:
> +** fcvtlt z0\.s, p0/z, z4\.h
> +** ret
> +*/
> +TEST_DUAL_Z (cvtlt_f32_f16_z_untied, svfloat32_t, svfloat16_t,
> + z0 = svcvtlt_f32_f16_z (p0, z4),
> + z0 = svcvtlt_f32_z (p0, z4))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c
> new file mode 100644
> index 00000000000..29229c022fb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c
> @@ -0,0 +1,28 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** cvtlt_f64_f32_z_tied1:
> +** fcvtlt z0\.d, p0/z, z0\.s
> +** ret
> +*/
> +TEST_DUAL_Z_REV (cvtlt_f64_f32_z_tied1, svfloat64_t, svfloat32_t,
> + z0_res = svcvtlt_f64_f32_z (p0, z0),
> + z0_res = svcvtlt_f64_z (p0, z0))
> +
> +/*
> +** cvtlt_f64_f32_z_untied:
> +** fcvtlt z0\.d, p0/z, z4\.s
> +** ret
> +*/
> +TEST_DUAL_Z (cvtlt_f64_f32_z_untied, svfloat64_t, svfloat32_t,
> + z0 = svcvtlt_f64_f32_z (p0, z4),
> + z0 = svcvtlt_f64_z (p0, z4))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c
> new file mode 100644
> index 00000000000..2f23e49d782
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c
> @@ -0,0 +1,34 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +#pragma GCC target "+sve2p2+bf16"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** cvtnt_bf16_f32_z_tied1:
> +** bfcvtnt z0\.h, p0/z, z4\.s
> +** ret
> +*/
> +TEST_DUAL_Z (cvtnt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
> + z0 = svcvtnt_bf16_f32_z (z0, p0, z4),
> + z0 = svcvtnt_bf16_z (z0, p0, z4))
> +
> +/*
> +** cvtnt_bf16_f32_z_untied:
> +** (
> +** mov z0\.d, z1\.d
> +** bfcvtnt z0\.h, p0/z, z4\.s
> +** |
> +** bfcvtnt z1\.h, p0/z, z4\.s
> +** mov z0\.d, z1\.d
> +** )
> +** ret
> +*/
> +TEST_DUAL_Z (cvtnt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
> +     z0 = svcvtnt_bf16_f32_z (z1, p0, z4),
> +     z0 = svcvtnt_bf16_z (z1, p0, z4))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c
> new file mode 100644
> index 00000000000..d05a0006a82
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c
> @@ -0,0 +1,34 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** cvtnt_f16_f32_z_tied1:
> +** fcvtnt z0\.h, p0/z, z4\.s
> +** ret
> +*/
> +TEST_DUAL_Z (cvtnt_f16_f32_z_tied1, svfloat16_t, svfloat32_t,
> +     z0 = svcvtnt_f16_f32_z (z0, p0, z4),
> +     z0 = svcvtnt_f16_z (z0, p0, z4))
> +
> +/*
> +** cvtnt_f16_f32_z_untied:
> +** (
> +** mov z0\.d, z1\.d
> +** fcvtnt z0\.h, p0/z, z4\.s
> +** |
> +** fcvtnt z1\.h, p0/z, z4\.s
> +** mov z0\.d, z1\.d
> +** )
> +** ret
> +*/
> +TEST_DUAL_Z (cvtnt_f16_f32_z_untied, svfloat16_t, svfloat32_t,
> +     z0 = svcvtnt_f16_f32_z (z1, p0, z4),
> +     z0 = svcvtnt_f16_z (z1, p0, z4))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c
> new file mode 100644
> index 00000000000..042cf1c6784
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c
> @@ -0,0 +1,34 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** cvtnt_f32_f64_z_tied1:
> +** fcvtnt z0\.s, p0/z, z4\.d
> +** ret
> +*/
> +TEST_DUAL_Z (cvtnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t,
> +     z0 = svcvtnt_f32_f64_z (z0, p0, z4),
> +     z0 = svcvtnt_f32_z (z0, p0, z4))
> +
> +/*
> +** cvtnt_f32_f64_z_untied:
> +** (
> +** mov z0\.d, z1\.d
> +** fcvtnt z0\.s, p0/z, z4\.d
> +** |
> +** fcvtnt z1\.s, p0/z, z4\.d
> +** mov z0\.d, z1\.d
> +** )
> +** ret
> +*/
> +TEST_DUAL_Z (cvtnt_f32_f64_z_untied, svfloat32_t, svfloat64_t,
> +     z0 = svcvtnt_f32_f64_z (z1, p0, z4),
> +     z0 = svcvtnt_f32_z (z1, p0, z4))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c
> new file mode 100644
> index 00000000000..0f12bca6b55
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c
> @@ -0,0 +1,34 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** cvtxnt_f32_f64_z_tied1:
> +** fcvtxnt z0\.s, p0/z, z4\.d
> +** ret
> +*/
> +TEST_DUAL_Z (cvtxnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t,
> +     z0 = svcvtxnt_f32_f64_z (z0, p0, z4),
> +     z0 = svcvtxnt_f32_z (z0, p0, z4))
> +
> +/*
> +** cvtxnt_f32_f64_z_untied:
> +** (
> +** mov z0\.d, z1\.d
> +** fcvtxnt z0\.s, p0/z, z4\.d
> +** |
> +** fcvtxnt z1\.s, p0/z, z4\.d
> +** mov z0\.d, z1\.d
> +** )
> +** ret
> +*/
> +TEST_DUAL_Z (cvtxnt_f32_f64_z_untied, svfloat32_t, svfloat64_t,
> +     z0 = svcvtxnt_f32_f64_z (z1, p0, z4),
> +     z0 = svcvtxnt_f32_z (z1, p0, z4))
> -- 
> 2.43.0
>
Re: [GCC17 PATCH 08/12] aarch64: add zeroing forms for predicated SVE top FP conversions

Reply via email to