On 14/07/17 09:20, Thomas Preudhomme wrote: > Hi, > > fp-armv8 is currently defined as a double precision FPv5 with 32 D > registers *and* a special FP_ARMv8 bit. However FP for ARMv8 should only > bring 32 D registers on top of FPv5-D16 so this FP_ARMv8 bit is > spurious. As a consequence, many instruction patterns which are guarded > by TARGET_FPU_ARMV8 are unavailable to FPv5-D16 and FPv5-SP-D16. > > This patch gets rid of TARGET_FPU_ARMV8 and rewire all uses to > expressions based on TARGET_VFP5, TARGET_VFPD32 and TARGET_VFP_DOUBLE. > It also redefine ISA_FP_ARMv8 to include the D32 capability to > distinguish it from FPv5-D16. At last, it sets the +fp.sp for ARMv8-R to > enable FPv5-SP-D16 (ie FP for ARMv8 with single precision only and 16 D > registers). > > ChangeLog entry is as follows: > > 2017-07-07 Thomas Preud'homme <thomas.preudho...@arm.com> > > * config/arm/arm-isa.h (isa_bit_FP_ARMv8): Delete enumerator. > (ISA_FP_ARMv8): Define as ISA_FPv5 and ISA_FP_D32. > * config/arm/arm-cpus.in (armv8-r): Define fp.sp as enabling FPv5. > (fp-armv8): Define it as FP_ARMv8 only. > config/arm/arm.h (TARGET_FPU_ARMV8): Delete. > (TARGET_VFP_FP16INST): Define using TARGET_VFP5 rather than > TARGET_FPU_ARMV8. > config/arm/arm.c (arm_rtx_costs_internal): Replace checks against > TARGET_FPU_ARMV8 by checks against TARGET_VFP5. > * config/arm/arm-builtins.c (arm_builtin_vectorized_function): Define > first ARM_CHECK_BUILTIN_MODE definition using TARGET_VFP5 rather > than TARGET_FPU_ARMV8. > * config/arm/arm-c.c (arm_cpu_builtins): Likewise for > __ARM_FEATURE_NUMERIC_MAXMIN macro definition. > * config/arm/arm.md (cmov<mode>): Condition on TARGET_VFP5 rather than > TARGET_FPU_ARMV8. > * config/arm/neon.md (neon_vrint): Likewise. > (neon_vcvt): Likewise. > (neon_<fmaxmin_op><mode>): Likewise. > (<fmaxmin><mode>3): Likewise. > * config/arm/vfp.md (l<vrint_pattern><su_optab><mode>si2): Likewise. > * config/arm/predicates.md (arm_cond_move_operator): Check against > TARGET_VFP5 rather than TARGET_FPU_ARMV8 and fix spacing. > > Testing: > * Bootstrapped under ARMv8-A Thumb state and ran testsuite -> no > regression > * built Spec2000 and Spec2006 with -march=armv8-a+fp16 and compared > objdump -> no code generation difference > > Is this ok for trunk?
OK with changes mentioned below. R. > > Best regards, > > Thomas > > rewire_mfpu_fparmv8.patch > > > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index > 63ee880822c17eda55dd58438d61cbbba333b2c6..7504ed581c63a657a0dff48442633704bd252b2e > 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -3098,7 +3098,7 @@ arm_builtin_vectorized_function (unsigned int fn, tree > type_out, tree type_in) > NULL_TREE is returned if no such builtin is available. */ > #undef ARM_CHECK_BUILTIN_MODE > #define ARM_CHECK_BUILTIN_MODE(C) \ > - (TARGET_FPU_ARMV8 \ > + (TARGET_VFP5 \ > && flag_unsafe_math_optimizations \ > && ARM_CHECK_BUILTIN_MODE_1 (C)) > > diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c > index > a3daa3220a2bc4220dffdb7ca08ca9419bdac425..9178937b6d9e0fe5d0948701390c4cf01f4f8c7d > 100644 > --- a/gcc/config/arm/arm-c.c > +++ b/gcc/config/arm/arm-c.c > @@ -96,7 +96,7 @@ arm_cpu_builtins (struct cpp_reader* pfile) > || TARGET_ARM_ARCH_ISA_THUMB >=2)); > > def_or_undef_macro (pfile, "__ARM_FEATURE_NUMERIC_MAXMIN", > - TARGET_ARM_ARCH >= 8 && TARGET_NEON && TARGET_FPU_ARMV8); > + TARGET_ARM_ARCH >= 8 && TARGET_NEON && TARGET_VFP5); This looks wrong (though ACLE is misleading). The MAXMIN property is solely defined by having an FPv5 capable FPU. > > def_or_undef_macro (pfile, "__ARM_FEATURE_SIMD32", TARGET_INT_SIMD); > > diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in > index > f35128acb7d68c6a0592355b9d3d56ee8f826aca..e2ff297aed7514073dbb3bf5ee86964f202e5a14 > 100644 > --- a/gcc/config/arm/arm-cpus.in > +++ b/gcc/config/arm/arm-cpus.in > @@ -389,7 +389,7 @@ begin arch armv8-r > option crc add bit_crc32 > # fp.sp => fp-armv8 (d16); simd => simd + fp-armv8 + d32 + double precision Please update comment > # note: no fp option for fp-armv8 (d16) + double precision at the moment > - option fp.sp add FP_ARMv8 > + option fp.sp add FPv5 > option simd add FP_ARMv8 NEON > option crypto add FP_ARMv8 CRYPTO > option nocrypto remove ALL_CRYPTO > @@ -1390,7 +1390,7 @@ begin fpu fpv5-d16 > end fpu fpv5-d16 > > begin fpu fp-armv8 > - isa FP_ARMv8 FP_D32 > + isa FP_ARMv8 > end fpu fp-armv8 > > begin fpu neon-fp-armv8 > diff --git a/gcc/config/arm/arm-isa.h b/gcc/config/arm/arm-isa.h > index > 0d66a0400c517668db023fc66ff43e26d43add51..dbd29eaa52f2007498c2aff6263b8b6c3a70e2c2 > 100644 > --- a/gcc/config/arm/arm-isa.h > +++ b/gcc/config/arm/arm-isa.h > @@ -60,7 +60,6 @@ enum isa_feature > isa_bit_VFPv4, /* Vector floating point v4. */ > isa_bit_FPv5, /* Floating point v5. */ > isa_bit_lpae, /* ARMv7-A LPAE. */ > - isa_bit_FP_ARMv8, /* ARMv8 floating-point extension. */ > isa_bit_neon, /* Advanced SIMD instructions. */ > isa_bit_fp16conv, /* Conversions to/from fp16 (VFPv3 extension). > */ > isa_bit_fp_dbl, /* Double precision operations supported. */ > @@ -143,7 +142,7 @@ enum isa_feature > default. isa_bit_fp16 is deliberately missing from this list. */ > #define ISA_ALL_FPU_INTERNAL \ > isa_bit_VFPv2, isa_bit_VFPv3, isa_bit_VFPv4, isa_bit_FPv5, \ > - isa_bit_FP_ARMv8, isa_bit_fp16conv, isa_bit_fp_dbl, ISA_ALL_SIMD > + isa_bit_fp16conv, isa_bit_fp_dbl, ISA_ALL_SIMD > > /* Similarly, but including fp16 and other extensions that aren't part of > -mfpu support. */ > @@ -154,10 +153,10 @@ enum isa_feature > #define ISA_VFPv3 ISA_VFPv2, isa_bit_VFPv3 > #define ISA_VFPv4 ISA_VFPv3, isa_bit_VFPv4, isa_bit_fp16conv > #define ISA_FPv5 ISA_VFPv4, isa_bit_FPv5 > -#define ISA_FP_ARMv8 ISA_FPv5, isa_bit_FP_ARMv8 > > #define ISA_FP_DBL isa_bit_fp_dbl > #define ISA_FP_D32 ISA_FP_DBL, isa_bit_fp_d32 > +#define ISA_FP_ARMv8 ISA_FPv5, ISA_FP_D32 > #define ISA_NEON ISA_FP_D32, isa_bit_neon > #define ISA_CRYPTO ISA_NEON, isa_bit_crypto > > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h > index > 315622212a5ce10d0c771535fe31f63c3be16444..4f53583cf0219de4329bc64a47a5a42c550ff354 > 100644 > --- a/gcc/config/arm/arm.h > +++ b/gcc/config/arm/arm.h > @@ -196,10 +196,6 @@ extern tree arm_fp16_type_node; > /* FPU supports fused-multiply-add operations. */ > #define TARGET_FMA (bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv4)) > > -/* FPU is ARMv8 compatible. */ > -#define TARGET_FPU_ARMV8 \ > - (bitmap_bit_p (arm_active_target.isa, isa_bit_FP_ARMv8)) > - > /* FPU supports Crypto extensions. */ > #define TARGET_CRYPTO (bitmap_bit_p (arm_active_target.isa, isa_bit_crypto)) > > @@ -216,7 +212,7 @@ extern tree arm_fp16_type_node; > > /* FPU supports the floating point FP16 instructions for ARMv8.2 and later. > */ > #define TARGET_VFP_FP16INST \ > - (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 && arm_fp16_inst) > + (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP5 && arm_fp16_inst) > > /* FPU supports the AdvSIMD FP16 instructions for ARMv8.2 and later. */ > #define TARGET_NEON_FP16INST (TARGET_VFP_FP16INST && TARGET_NEON_RDMA) > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index > c6101efd555996a4c6db5eaea0130b0940c4cff8..f59132c3f079d10d9e3d920b61037db2f3144eee > 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -10755,7 +10755,7 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, > enum rtx_code outer_code, > { > if (speed_p) > *cost += extra_cost->fp[mode == DFmode].widen; > - if (!TARGET_FPU_ARMV8 > + if (!TARGET_VFP5 > && GET_MODE (XEXP (x, 0)) == HFmode) > { > /* Pre v8, widening HF->DF is a two-step process, first > @@ -10849,7 +10849,7 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, > enum rtx_code outer_code, > return true; > } > else if (GET_MODE_CLASS (mode) == MODE_FLOAT > - && TARGET_FPU_ARMV8) > + && TARGET_VFP5) > { > if (speed_p) > *cost += extra_cost->fp[mode == DFmode].roundint; > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md > index > e6e1ac54a850c35807d683804f5294fbef1487ad..049a78edefe9f85c6f84a4ecf0158d559e1d5674 > 100644 > --- a/gcc/config/arm/arm.md > +++ b/gcc/config/arm/arm.md > @@ -7879,7 +7879,7 @@ > "<F_constraint>") > (match_operand:SDF 4 "s_register_operand" > "<F_constraint>")))] > - "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" > + "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>" > "* > { > enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]); > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index > 33b25ff3c730544b4376bf318400d703c8813a0a..235c46da1a19712e2924d748545474ed991d9f92 > 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -751,7 +751,7 @@ > (unspec:VCVTF [(match_operand:VCVTF 1 > "s_register_operand" "w")] > NEON_VRINT))] > - "TARGET_NEON && TARGET_FPU_ARMV8" > + "TARGET_NEON && TARGET_VFP5" > "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1" > [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] > ) > @@ -761,7 +761,7 @@ > (FIXUORS:<V_cmp_result> (unspec:VCVTF > [(match_operand:VCVTF 1 "register_operand" "w")] > NEON_VCVT)))] > - "TARGET_NEON && TARGET_FPU_ARMV8" > + "TARGET_NEON && TARGET_VFP5" > "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" > [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") > (set_attr "predicable" "no")] > @@ -2901,7 +2901,7 @@ > (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") > (match_operand:VCVTF 2 "s_register_operand" "w")] > VMAXMINFNM))] > - "TARGET_NEON && TARGET_FPU_ARMV8" > + "TARGET_NEON && TARGET_VFP5" > "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > [(set_attr "type" "neon_fp_minmax_s<q>")] > ) > @@ -2912,7 +2912,7 @@ > (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") > (match_operand:VCVTF 2 "s_register_operand" "w")] > VMAXMINFNM))] > - "TARGET_NEON && TARGET_FPU_ARMV8" > + "TARGET_NEON && TARGET_VFP5" > "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > [(set_attr "type" "neon_fp_minmax_s<q>")] > ) > diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md > index > afb5d6339a8af362384c93bbb46928635073b74b..3e25cd16b29231d53b4cadce3db0fbb3168cd4c5 > 100644 > --- a/gcc/config/arm/predicates.md > +++ b/gcc/config/arm/predicates.md > @@ -350,9 +350,9 @@ > > (define_special_predicate "arm_cond_move_operator" > (if_then_else (match_test "arm_restrict_it") > - (and (match_test "TARGET_FPU_ARMV8") > - (match_operand 0 "arm_vsel_comparison_operator")) > - (match_operand 0 "expandable_comparison_operator"))) > + (and (match_test "TARGET_VFP5") > + (match_operand 0 "arm_vsel_comparison_operator")) > + (match_operand 0 "expandable_comparison_operator"))) > > (define_special_predicate "noov_comparison_operator" > (match_code "lt,ge,eq,ne")) > diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md > index > d8f77e2ffe4fdb7c952d6a5ac947d91f89ce259d..23c1d67c9e3707e64a4e206dc62727e4c79ba89c > 100644 > --- a/gcc/config/arm/vfp.md > +++ b/gcc/config/arm/vfp.md > @@ -1997,7 +1997,7 @@ > (FIXUORS:SI (unspec:SDF > [(match_operand:SDF 1 > "register_operand" "<F_constraint>")] VCVT)))] > - "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" > + "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>" > "vcvt<vrint_variant>.<su>32.<V_if_elem>\\t%0, %<V_reg>1" > [(set_attr "predicable" "no") > (set_attr "conds" "unconditional") >