Re: [AArch64] Handle HFAs of float16 types properly

Richard Earnshaw (lists) Fri, 05 Aug 2016 07:54:40 -0700

On 26/07/16 14:55, James Greenhalgh wrote:
> 
> Hi,
> 
> It looks like we've not been handling structures of 16-bit floating-point
> data correctly for AArch64. For some reason we end up passing them
> packed in to integer registers. That is to say, on trunk and GCC 6, for:
> 
>   struct x {
>     __fp16 x[4];
>   };
> 
>   __fp16
>   foo1 (struct x x)
>   {
>     return x.x[1];
>   }
> 
> We generate:
> 
>   foo1:
>       sbfx    x0, x0, 16, 16
>       mov     v0.h[0], w0
>       ret
> 
> Which is wrong.
> 
> This patch fixes that, so now we generate:
> 
>   foo1:
>       umov    w0, v1.h[0]
>       sxth    x0, w0
>       mov     v0.h[0], w0
>       ret
> 
> Far from optimal (I'll work on that...) but at least getting the data from
> the right register bank!
> 
> To do this we need to keep around a reference to the fp16 type after we
> construct it. I've moved this initialisation to a new function
> aarch64_init_fp16_types in aarch64-builtins.c and made the references
> available through arm_neon.h.
> 
> After that, we want to remove the #if 0 wrapping HFmode support in
> aarch64_gimplify_va_arg_expr in aarch64.c, and add HFmode to the
> REAL_TYPE and COMPLEX_TYPE support in aapcs_vfp_sub_candidate.
> 
> Strictly speaking, we don't need the hunk regarding COMPLEX_TYPE.
> We can't build complex forms of __fp16. But, were we ever to support the
> _Float16 type we'd need this. Rather than leave the chance it will be
> forgotten about, I've just added it here. If the maintainers would prefer,
> I can change this to a TODO and put a sticky-note somewhere near my desk.
> 
> With those simple changes, we fix the argument passing. The rest of the
> patch is an update to the various testcases in aapcs64.exp to fully cover
> various __fp16 cases (both naked, and within an HFA).
> 
> Bootstrapped on aarch64-none-linux-gnu and tested with no issues. Also
> tested on aarch64_be-none-elf. All test came back clean.
> 
> OK? As this is an ABI break, I'm not proposing for it to go back to GCC 6,
> though it will apply cleanly there if the maintainers support that.
>


Can you please file a PR for this and use that when committing.  As
previously discussed, since this was new for 6.1 having a PR makes it
easier if we do decide to have a back-port.

OK on that basis.

R.

> Thanks,
> James
> 
> ---
> 
> gcc/
> 
> 2016-07-26  James Greenhalgh  <james.greenha...@arm.com>
> 
>       * config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare.
>       (aarch64_fp16_ptr_type_node): Likewise.
>       * config/aarch64/aarch64-simd-builtins.c
>       (aarch64_fp16_ptr_type_node): Define.
>       (aarch64_init_fp16_types): New, refactored out of...
>       (aarch64_init_builtins): ...here, update to call
>       aarch64_init_fp16_types.
>       * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle
>       HFmode.
>       (aapcs_vfp_sub_candidate): Likewise.
> 
> gcc/testsuite/
> 
> 2016-07-26  James Greenhalgh  <james.greenha...@arm.com>
> 
>       * gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision
>       registers.
>       * gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for
>       saving the half-precision registers.
>       * gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16
>       value is returned in h0.
>       * gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments
>       are passed in FP/SIMD registers.
>       * gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA
>       passing works corrcetly.
>       * gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New.
>       (hfa_f16x2_t): Likewise.
>       (hfa_f16x3_t): Likewise.
>       * gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values
>       are promoted to double and passed in a double register.
>       * gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values
>       are promoted to double and stacked.
>       * gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of
>       __fp16 data types.
>       * gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise.
>       * gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of
>       __fp16 first get passed in FP/SIMD registers, then stacked.
> 
> 
> 0001-AArch64-Handle-HFAs-of-float16-types-properly.patch
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-builtins.c 
> b/gcc/config/aarch64/aarch64-builtins.c
> index ca91d91..1de325a 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -443,13 +443,15 @@ static struct aarch64_simd_type_info aarch64_simd_types 
> [] = {
>  };
>  #undef ENTRY
>  
> -/* This type is not SIMD-specific; it is the user-visible __fp16.  */
> -static tree aarch64_fp16_type_node = NULL_TREE;
> -
>  static tree aarch64_simd_intOI_type_node = NULL_TREE;
>  static tree aarch64_simd_intCI_type_node = NULL_TREE;
>  static tree aarch64_simd_intXI_type_node = NULL_TREE;
>  
> +/* The user-visible __fp16 type, and a pointer to that type.  Used
> +   across the back-end.  */
> +tree aarch64_fp16_type_node = NULL_TREE;
> +tree aarch64_fp16_ptr_type_node = NULL_TREE;
> +
>  static const char *
>  aarch64_mangle_builtin_scalar_type (const_tree type)
>  {
> @@ -883,6 +885,21 @@ aarch64_init_builtin_rsqrt (void)
>    }
>  }
>  
> +/* Initialize the backend types that support the user-visible __fp16
> +   type, also initialize a pointer to that type, to be used when
> +   forming HFAs.  */
> +
> +static void
> +aarch64_init_fp16_types (void)
> +{
> +  aarch64_fp16_type_node = make_node (REAL_TYPE);
> +  TYPE_PRECISION (aarch64_fp16_type_node) = 16;
> +  layout_type (aarch64_fp16_type_node);
> +
> +  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, 
> "__fp16");
> +  aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
> +}
> +
>  void
>  aarch64_init_builtins (void)
>  {
> @@ -904,11 +921,7 @@ aarch64_init_builtins (void)
>      = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
>                           AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, 
> NULL_TREE);
>  
> -  aarch64_fp16_type_node = make_node (REAL_TYPE);
> -  TYPE_PRECISION (aarch64_fp16_type_node) = 16;
> -  layout_type (aarch64_fp16_type_node);
> -
> -  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, 
> "__fp16");
> +  aarch64_init_fp16_types ();
>  
>    if (TARGET_SIMD)
>      aarch64_init_simd_builtins ();
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index fe2683e..addcf2c 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9888,15 +9888,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, 
> gimple_seq *pre_p,
>         field_t = long_double_type_node;
>         field_ptr_t = long_double_ptr_type_node;
>         break;
> -/* The half precision and quad precision are not fully supported yet.  Enable
> -   the following code after the support is complete.  Need to find the 
> correct
> -   type node for __fp16 *.  */
> -#if 0
>       case HFmode:
> -       field_t = float_type_node;
> -       field_ptr_t = float_ptr_type_node;
> +       field_t = aarch64_fp16_type_node;
> +       field_ptr_t = aarch64_fp16_ptr_type_node;
>         break;
> -#endif
>       case V2SImode:
>       case V4SImode:
>           {
> @@ -10058,7 +10053,8 @@ aapcs_vfp_sub_candidate (const_tree type, 
> machine_mode *modep)
>      {
>      case REAL_TYPE:
>        mode = TYPE_MODE (type);
> -      if (mode != DFmode && mode != SFmode && mode != TFmode)
> +      if (mode != DFmode && mode != SFmode
> +       && mode != TFmode && mode != HFmode)
>       return -1;
>  
>        if (*modep == VOIDmode)
> @@ -10071,7 +10067,8 @@ aapcs_vfp_sub_candidate (const_tree type, 
> machine_mode *modep)
>  
>      case COMPLEX_TYPE:
>        mode = TYPE_MODE (TREE_TYPE (type));
> -      if (mode != DFmode && mode != SFmode && mode != TFmode)
> +      if (mode != DFmode && mode != SFmode
> +       && mode != TFmode && mode != HFmode)
>       return -1;
>  
>        if (*modep == VOIDmode)
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 1915980..9e26eb1 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -928,4 +928,9 @@ extern const char *host_detect_local_cpu (int argc, const 
> char **argv);
>  
>  #define ASM_OUTPUT_POOL_EPILOGUE  aarch64_asm_output_pool_epilogue
>  
> +/* This type is the user-visible __fp16, and a pointer to that type.  We
> +   need it in many places in the backend.  Defined in aarch64-builtins.c.  */
> +extern tree aarch64_fp16_type_node;
> +extern tree aarch64_fp16_ptr_type_node;
> +
>  #endif /* GCC_AARCH64_H */
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h
> index 4e2ef0d..138de73 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h
> @@ -57,7 +57,17 @@
>  #define X8     320
>  #define X9     328
>  
> -#define STACK  336
> +#define H0   336
> +#define H1   338
> +#define H2   340
> +#define H3   342
> +#define H4   344
> +#define H5   346
> +#define H6   348
> +#define H7   350
> +
> +
> +#define STACK  352
>  
>  /* The type of test.  'myfunc' in abitest.S needs to know which kind of
>     test it is running to decide what to do at the runtime.  Keep the
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S
> index c2fbd83..893e68c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S
> @@ -13,7 +13,12 @@ dumpregs:
>  myfunc:
>        mov    x16, sp
>        mov    x17, sp
> -      sub    sp,  sp, 352 // 336 for registers and 16 for old sp and lr
> +      sub    sp,  sp, 368 // 352 for registers and 16 for old sp and lr
> +
> +      sub    x17, x17, 8
> +      st4    { v4.h, v5.h, v6.h, v7.h }[0], [x17] //344
> +      sub    x17, x17, 8
> +      st4    { v0.h, v1.h, v2.h, v3.h }[0], [x17] //336
>  
>        stp    x8, x9, [x17, #-16]! //320
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
> index a21c926..29a1ca6 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
> @@ -44,4 +44,5 @@ FUNC_VAL_CHECK (12,         vf2_t,        vf2, D0, f32in64)
>  FUNC_VAL_CHECK (13,         vi4_t,        vi4, Q0, i32in128)
>  FUNC_VAL_CHECK (14,         int *,    int_ptr, X0, flat)
>  FUNC_VAL_CHECK (15,         vlf1_t,    vlf1, Q0, flat)
> +FUNC_VAL_CHECK (16,         __fp16,    0xabcd, H0, flat)
>  #endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c
> index 94817ed..ce7c60a8 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c
> @@ -12,5 +12,6 @@
>    ARG(double, 4.0, D1)
>    ARG(float, 2.0f, S2)
>    ARG(double, 5.0, D3)
> +  ARG(__fp16, 8.0f, H4)
>    LAST_ARG(int, 3, W0)
>  #endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c
> new file mode 100644
> index 0000000..7bc79f5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c
> @@ -0,0 +1,46 @@
> +/* Test AAPCS64 layout
> +
> +   Test named homogeneous floating-point aggregates of __fp16 data,
> +   which should be passed in SIMD/FP registers or via the stack.  */
> +
> +/* { dg-do run { target aarch64*-*-* } } */
> +
> +#ifndef IN_FRAMEWORK
> +#define TESTFILE "test_27.c"
> +
> +struct x0
> +{
> +  __fp16 v[1];
> +} f16x1;
> +
> +struct x1
> +{
> +  __fp16 v[2];
> +} f16x2;
> +
> +struct x2
> +{
> +  __fp16 v[3];
> +} f16x3;
> +
> +#define HAS_DATA_INIT_FUNC
> +void init_data ()
> +{
> +  f16x1.v[0] = 2.0f;
> +  f16x2.v[0] = 4.0f;
> +  f16x2.v[1] = 8.0f;
> +  f16x3.v[0] = 16.0f;
> +  f16x3.v[1] = 32.0f;
> +  f16x3.v[2] = 64.0f;
> +}
> +
> +#include "abitest.h"
> +#else
> +ARG (struct x0, f16x1, H0)
> +ARG (struct x1, f16x2, H1)
> +ARG (struct x2, f16x3, H3)
> +ARG (struct x1, f16x2, H6)
> +ARG (struct x0, f16x1, STACK)
> +ARG (int, 0xdeadbeef, W0)
> +LAST_ARG (double, 456.789, STACK+8)
> +#endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
> index 3b9b349..ca1fa58 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
> @@ -44,6 +44,24 @@ struct hfa_fx3_t
>    float c;
>  };
>  
> +struct hfa_f16x1_t
> +{
> +  __fp16 a;
> +};
> +
> +struct hfa_f16x2_t
> +{
> +  __fp16 a;
> +  __fp16 b;
> +};
> +
> +struct hfa_f16x3_t
> +{
> +  __fp16 a;
> +  __fp16 b;
> +  __fp16 c;
> +};
> +
>  struct hfa_dx2_t
>  {
>    double a;
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c
> index 4fb9a03..5b9e057 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c
> @@ -19,6 +19,8 @@ signed short ss = 0xcba9;
>  signed int ss_promoted = 0xffffcba9;
>  float fp = 65432.12345f;
>  double fp_promoted = (double)65432.12345f;
> +__fp16 fp16 = 2.0f;
> +__fp16 fp16_promoted = (double)2.0f;
>  
>  #define HAS_DATA_INIT_FUNC
>  void init_data ()
> @@ -46,9 +48,13 @@ void init_data ()
>    ANON         (    long double   , 98765432123456789.987654321L,      Q2,   
>    12)
>    ANON         (             vf2_t, vf2   ,                            D3,   
>    13)
>    ANON         (             vi4_t, vi4   ,                            Q4,   
>    14)
> +  /* 7.2: For unprototyped (i.e. pre- ANSI or K&R C) and variadic functions,
> +     in addition to the normal conversions and promotions, arguments of
> +     type __fp16 are converted to type double.  */
> +  ANON_PROMOTED(            __fp16, fp16  ,     double, fp16_promoted, D5,   
>    15)
>  #ifndef __AAPCS64_BIG_ENDIAN__
> -  LAST_ANON    (         int      , 0xeeee,                            
> STACK+32,15)
> +  LAST_ANON    (         int      , 0xeeee,                            
> STACK+32,16)
>  #else
> -  LAST_ANON    (         int      , 0xeeee,                            
> STACK+36,15)
> +  LAST_ANON    (         int      , 0xeeee,                            
> STACK+36,16)
>  #endif
>  #endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c
> new file mode 100644
> index 0000000..73f8f1c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c
> @@ -0,0 +1,28 @@
> +/* Test AAPCS64 layout and __builtin_va_arg.
> +
> +   This test is focused particularly on __fp16 unnamed homogeneous
> +   floating-point aggregate types which should be passed in fp/simd
> +   registers until we run out of those, then the stack.  */
> +
> +/* { dg-do run { target aarch64*-*-* } } */
> +
> +#ifndef IN_FRAMEWORK
> +#define AAPCS64_TEST_STDARG
> +#define TESTFILE "va_arg-16.c"
> +#include "type-def.h"
> +
> +struct hfa_f16x1_t hfa_f16x1 = {2.0f};
> +struct hfa_f16x2_t hfa_f16x2 = {4.0f, 8.0f};
> +struct hfa_f16x3_t hfa_f16x3 = {16.0f, 32.0f, 64.0f};
> +
> +#include "abitest.h"
> +#else
> +  ARG      (int, 1, W0, LAST_NAMED_ARG_ID)
> +  DOTS
> +  ANON     (struct hfa_f16x1_t, hfa_f16x1, H0     , 0)
> +  ANON     (struct hfa_f16x2_t, hfa_f16x2, H1     , 1)
> +  ANON     (struct hfa_f16x3_t, hfa_f16x3, H3     , 2)
> +  ANON     (struct hfa_f16x2_t, hfa_f16x2, H6     , 3)
> +  ANON     (struct hfa_f16x1_t, hfa_f16x1, STACK  , 4)
> +  LAST_ANON(double            , 1.0      , STACK+8, 5)
> +#endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c
> index e972691..8f2f881 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c
> @@ -19,6 +19,8 @@ signed short ss = 0xcba9;
>  signed int ss_promoted = 0xffffcba9;
>  float fp = 65432.12345f;
>  double fp_promoted = (double)65432.12345f;
> +__fp16 fp16 = 2.0f;
> +__fp16 fp16_promoted = (double)2.0f;
>  
>  #define HAS_DATA_INIT_FUNC
>  void init_data ()
> @@ -64,9 +66,10 @@ void init_data ()
>    ANON         (    long double   , 98765432123456789.987654321L,      
> STACK+80, 20)
>    ANON         (             vf2_t, vf2   ,                            
> STACK+96, 21)
>    ANON         (             vi4_t, vi4   ,                            
> STACK+112,22)
> +  ANON_PROMOTED(         __fp16   , fp16  ,     double, fp16_promoted, 
> STACK+128,23)
>  #ifndef __AAPCS64_BIG_ENDIAN__
> -  LAST_ANON    (         int      , 0xeeee,                            
> STACK+128,23)
> +  LAST_ANON    (         int      , 0xeeee,                            
> STACK+136,24)
>  #else
> -  LAST_ANON    (         int      , 0xeeee,                            
> STACK+132,23)
> +  LAST_ANON    (         int      , 0xeeee,                            
> STACK+140,24)
>  #endif
>  #endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c
> index fab3575..010ad8b 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c
> @@ -29,6 +29,8 @@ struct non_hfa_ffvf2_t non_hfa_ffvf2;
>  struct non_hfa_fffd_t non_hfa_fffd = {33.f, 34.f, 35.f, 36.0};
>  union hfa_union_t hfa_union;
>  union non_hfa_union_t non_hfa_union;
> +struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f};
> +struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f};
>  
>  #define HAS_DATA_INIT_FUNC
>  void init_data ()
> @@ -89,9 +91,12 @@ void init_data ()
>    PTR_ANON (struct non_hfa_ffs_t  , non_hfa_ffs  , STACK+120, 18)
>    ANON     (struct non_hfa_ffs_2_t, non_hfa_ffs_2, STACK+128, 19)
>    ANON     (union  non_hfa_union_t, non_hfa_union, STACK+144, 20)
> +  /* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c.  */
> +  ANON     (struct hfa_f16x2_t    , hfa_f16x2    , STACK+152, 21)
> +  ANON     (struct hfa_f16x3_t    , hfa_f16x3    , STACK+160, 22)
>  #ifndef __AAPCS64_BIG_ENDIAN__
> -  LAST_ANON(int                   , 2            , STACK+152, 30)
> +  LAST_ANON(int                   , 2            , STACK+168, 30)
>  #else
> -  LAST_ANON(int                   , 2            , STACK+156, 30)
> +  LAST_ANON(int                   , 2            , STACK+172, 30)
>  #endif
>  #endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c 
> b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c
> index 4853f92..e54f1f5 100644
> --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c
> @@ -17,6 +17,8 @@ struct hfa_dx4_t hfa_dx4 = {1234.123, 2345.234, 3456.345, 
> 4567.456};
>  struct hfa_ldx3_t hfa_ldx3 = {123456.7890, 234567.8901, 345678.9012};
>  struct hfa_ffs_t hfa_ffs;
>  union hfa_union_t hfa_union;
> +struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f};
> +struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f};
>  
>  #define HAS_DATA_INIT_FUNC
>  void init_data ()
> @@ -43,5 +45,8 @@ void init_data ()
>    ANON     (struct hfa_fx1_t  , hfa_fx1  , STACK+24, 4)
>    ANON     (struct hfa_fx2_t  , hfa_fx2  , STACK+32, 5)
>    ANON     (struct hfa_dx2_t  , hfa_dx2  , STACK+40, 6)
> -  LAST_ANON(double            , 1.0      , STACK+56, 7)
> +  /* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c.  */
> +  ANON     (struct hfa_f16x2_t, hfa_f16x2, STACK+56, 7)
> +  ANON     (struct hfa_f16x3_t, hfa_f16x3, STACK+64, 8)
> +  LAST_ANON(double            , 1.0      , STACK+72, 9)
>  #endif
>

Re: [AArch64] Handle HFAs of float16 types properly

Reply via email to