Hi, This patch implements support for the ARM ACLE Coprocessor CDP intrinsics. See below a table mapping the intrinsics to their respective instructions:
+----------------------------------------------------+--------------------------------------+ | Intrinsic signature | Instruction pattern | +----------------------------------------------------+--------------------------------------+ |void __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) |CDP coproc, opc1, CRd, CRn, CRm, opc2 | +----------------------------------------------------+--------------------------------------+ |void __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) |CDP2 coproc, opc1, CRd, CRn, CRm, opc2| +----------------------------------------------------+--------------------------------------+ Note that any untyped variable in the intrinsic signature is required to be a compiler-time constant and has the type 'unsigned int'. We do some boundary checks for coproc:[0-15], opc1:[0-15], CR*:[0-31], opc2:[0-7]. If either of these requirements are not met a diagnostic is issued. I renamed neon_const_bounds in this patch, to arm_const_bounds, simply because it is also used in the Coprocessor intrinsics. It also requires the expansion of the builtin frame work such that it accepted 'void' modes and intrinsics with 6 arguments. I also changed acle.exp to run tests for multiple options, where all lto option sets are appended with -ffat-objects to allow for assembly scans. Is this OK for trunk? Regards, Andre gcc/ChangeLog: 2016-11-09 Andre Vieira <andre.simoesdiasvie...@arm.com> * config/arm/arm.md (<cdp>): New. * config/arm/arm.c (neon_const_bounds): Rename this ... (arm_const_bounds): ... this. (arm_coproc_builtin_available): New. * config/arm/arm-builtins.c (SIMD_MAX_BUILTIN_ARGS): Increase. (arm_type_qualifiers): Add 'qualifier_unsigned_immediate'. (CDP_QUALIFIERS): Define to... (arm_cdp_qualifiers): ... this. New. (void_UP): Define. (arm_expand_builtin_args): Add case for 6 arguments. * config/arm/arm-protos.h (neon_const_bounds): Rename this ... (arm_const_bounds): ... this. (arm_coproc_builtin_available): New. * config/arm/arm_acle.h (__arm_cdp): New. (__arm_cdp2): New. * config/arm/arm_acle_builtins.def (cdp): New. (cdp2): New. * config/arm/iterators.md (CDPI,CDP,cdp): New. * config/arm/neon.md: Rename all 'neon_const_bounds' to 'arm_const_bounds'. * config/arm/types.md (coproc): New. * config/arm/unspecs.md (VUNSPEC_CDP, VUNSPEC_CDP2): New. * gcc/doc/extend.texi (ACLE): Add a mention of Coprocessor intrinsics. gcc/testsuite/ChangeLog: 2016-11-09 Andre Vieira <andre.simoesdiasvie...@arm.com> * gcc.target/arm/acle/acle.exp: Run tests for different options and make sure fat-lto-objects is used such that we can still do assemble scans. * gcc.target/arm/acle/cdp.c: New. * gcc.target/arm/acle/cdp2.c: New. * lib/target-supports.exp (check_effective_target_arm_coproc1_ok): New. (check_effective_target_arm_coproc1_ok_nocache): New. (check_effective_target_arm_coproc2_ok): New. (check_effective_target_arm_coproc2_ok_nocache): New. (check_effective_target_arm_coproc3_ok): New. (check_effective_target_arm_coproc3_ok_nocache): New.
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 2130a3004f17c47be6e42412c1ea30f3cff20573..bdb8aad8658af089b4977373654bb2d2c0b5c653 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -38,7 +38,7 @@ #include "langhooks.h" #include "case-cfn-macros.h" -#define SIMD_MAX_BUILTIN_ARGS 5 +#define SIMD_MAX_BUILTIN_ARGS 7 enum arm_type_qualifiers { @@ -53,6 +53,7 @@ enum arm_type_qualifiers /* Used when expanding arguments if an operand could be an immediate. */ qualifier_immediate = 0x8, /* 1 << 3 */ + qualifier_unsigned_immediate = 0x9, qualifier_maybe_immediate = 0x10, /* 1 << 4 */ /* void foo (...). */ qualifier_void = 0x20, /* 1 << 5 */ @@ -164,6 +165,18 @@ arm_unsigned_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] qualifier_unsigned }; #define UBINOP_QUALIFIERS (arm_unsigned_binop_qualifiers) +/* void (unsigned immediate, unsigned immediate, unsigned immediate, + unsigned immediate, unsigned immediate, unsigned immediate). */ +static enum arm_type_qualifiers +arm_cdp_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned_immediate, + qualifier_unsigned_immediate, + qualifier_unsigned_immediate, + qualifier_unsigned_immediate, + qualifier_unsigned_immediate, + qualifier_unsigned_immediate }; +#define CDP_QUALIFIERS \ + (arm_cdp_qualifiers) /* The first argument (return type) of a store should be void type, which we represent with qualifier_void. Their first operand will be a DImode pointer to the location to store to, so we must use @@ -200,6 +213,7 @@ arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define oi_UP OImode #define hf_UP HFmode #define si_UP SImode +#define void_UP VOIDmode #define UP(X) X##_UP @@ -2212,6 +2226,10 @@ constant_arg: pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); break; + case 6: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]); + break; + default: gcc_unreachable (); } @@ -2238,6 +2256,10 @@ constant_arg: pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); break; + case 6: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); + break; + default: gcc_unreachable (); } diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index fd8b6d106d13faebedf7a3539d30159c14f061c6..1753db00a21b8f844517dfe664d3f9981a3fc33e 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -91,7 +91,7 @@ extern rtx neon_make_constant (rtx); extern tree arm_builtin_vectorized_function (unsigned int, tree, tree); extern void neon_expand_vector_init (rtx, rtx); extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree); -extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern void arm_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); extern HOST_WIDE_INT neon_element_bits (machine_mode); extern void neon_emit_pair_result_insn (machine_mode, rtx (*) (rtx, rtx, rtx, rtx), @@ -170,6 +170,7 @@ extern void arm_expand_compare_and_swap (rtx op[]); extern void arm_split_compare_and_swap (rtx op[]); extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); extern rtx arm_load_tp (rtx); +extern bool arm_coproc_builtin_available (enum unspecv); #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 022c1d72a1272e56397dc7e2018483e77f18b90d..71dae5fc76a0840791d43bb408af03660a917dbe 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13031,7 +13031,7 @@ neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, /* Bounds-check constants. */ void -neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) +arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) { bounds_check (operand, low, high, NULL_TREE, "constant"); } @@ -30784,4 +30784,33 @@ arm_elf_section_type_flags (tree decl, const char *name, int reloc) return flags; } +/* This function checks for the availability of the coprocessor builtin passed + in BUILTIN for the current target. Returns true if it is available and + false otherwise. If a BUILTIN is passed for which this function has not + been implemented it will cause an exception. */ + +bool arm_coproc_builtin_available (enum unspecv builtin) +{ + /* None of these builtins are available in Thumb mode if the target only + supports Thumb-1. */ + if (TARGET_THUMB1) + return false; + + switch (builtin) + { + case VUNSPEC_CDP: + if (arm_arch4) + return true; + break; + case VUNSPEC_CDP2: + /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and + ARMv8-{A,M}. */ + if (arm_arch5) + return true; + break; + default: + gcc_unreachable (); + } + return false; +} #include "gt-arm.h" diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 8393f65bcf4c9c3e61b91e5adcd5f59ff7c6ec3f..f52a657dbf7882a45b03b2b35e42e7a22e7a7a93 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -11838,6 +11838,26 @@ DONE; }) +(define_insn "<cdp>" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand") + (match_operand:SI 1 "immediate_operand") + (match_operand:SI 2 "immediate_operand") + (match_operand:SI 3 "immediate_operand") + (match_operand:SI 4 "immediate_operand") + (match_operand:SI 5 "immediate_operand")] CDPI)] + "arm_coproc_builtin_available (VUNSPEC_<CDP>)" +{ + arm_const_bounds (operands[0], 0, 16); + arm_const_bounds (operands[1], 0, 16); + arm_const_bounds (operands[2], 0, (1 << 5)); + arm_const_bounds (operands[3], 0, (1 << 5)); + arm_const_bounds (operands[4], 0, (1 << 5)); + arm_const_bounds (operands[5], 0, 8); + return "<cdp>\\tp%c0, %1, CR%c2, CR%c3, CR%c4, %5"; +} + [(set_attr "length" "4") + (set_attr "type" "coproc")]) + ;; Vector bits common to IWMMXT and Neon (include "vec-common.md") ;; Load the Intel Wireless Multimedia Extension patterns diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h index 5d937168e10499d7a926495d668efd2bc4a72f79..747a07ced6c7aa2dbf606ca48d4637847add12c6 100644 --- a/gcc/config/arm/arm_acle.h +++ b/gcc/config/arm/arm_acle.h @@ -32,6 +32,26 @@ extern "C" { #endif +#if (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4 +__extension__ static __inline void __attribute__ ((__always_inline__)) +__arm_cdp (const unsigned int __coproc, const unsigned int __opc1, + const unsigned int __CRd, const unsigned int __CRn, + const unsigned int __CRm, const unsigned int __opc2) +{ + return __builtin_arm_cdp (__coproc, __opc1, __CRd, __CRn, __CRm, __opc2); +} + +#if __ARM_ARCH >= 5 +__extension__ static __inline void __attribute__ ((__always_inline__)) +__arm_cdp2 (const unsigned int __coproc, const unsigned int __opc1, + const unsigned int __CRd, const unsigned int __CRn, + const unsigned int __CRm, const unsigned int __opc2) +{ + return __builtin_arm_cdp2 (__coproc, __opc1, __CRd, __CRn, __CRm, __opc2); +} +#endif /* __ARM_ARCH >= 5. */ +#endif /* (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4. */ + #ifdef __ARM_FEATURE_CRC32 __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __crc32b (uint32_t __a, uint8_t __b) diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def index 81ab7720971ba042a5d64c22b6bd19710147e602..03b5bf88ef2632bceedba1e64c0f83bc50337364 100644 --- a/gcc/config/arm/arm_acle_builtins.def +++ b/gcc/config/arm/arm_acle_builtins.def @@ -24,3 +24,5 @@ VAR1 (UBINOP, crc32w, si) VAR1 (UBINOP, crc32cb, si) VAR1 (UBINOP, crc32ch, si) VAR1 (UBINOP, crc32cw, si) +VAR1 (CDP, cdp, void) +VAR1 (CDP, cdp2, void) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 82ba08eb70edb007a068a42a6a53438fdda15aac..999cfe0cf14eb215047b30de760b9c31b76bcf52 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -943,3 +943,8 @@ ;; Attributes for VFMA_LANE/ VFMS_LANE (define_int_attr neon_vfm_lane_as [(UNSPEC_VFMA_LANE "a") (UNSPEC_VFMS_LANE "s")]) + +;; An iterator for the CDP coprocessor instructions +(define_int_iterator CDPI [VUNSPEC_CDP VUNSPEC_CDP2]) +(define_int_attr cdp [(VUNSPEC_CDP "cdp") (VUNSPEC_CDP2 "cdp2")]) +(define_int_attr CDP [(VUNSPEC_CDP "CDP") (VUNSPEC_CDP2 "CDP2")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 59316de004107913c1db0951ced4d584999fc099..153c3f3f23b27c8c6ed0b0a4afa3c16c9c965085 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3643,7 +3643,7 @@ if (BYTES_BIG_ENDIAN) VCVT_US_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 1, 33); + arm_const_bounds (operands[2], 1, 33); return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] @@ -3657,7 +3657,7 @@ if (BYTES_BIG_ENDIAN) VCVT_US_N))] "TARGET_NEON_FP16INST" { - neon_const_bounds (operands[2], 0, 17); + arm_const_bounds (operands[2], 0, 17); return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] @@ -3670,7 +3670,7 @@ if (BYTES_BIG_ENDIAN) VCVT_US_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 1, 33); + arm_const_bounds (operands[2], 1, 33); return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] @@ -3684,7 +3684,7 @@ if (BYTES_BIG_ENDIAN) VCVT_US_N))] "TARGET_NEON_FP16INST" { - neon_const_bounds (operands[2], 0, 17); + arm_const_bounds (operands[2], 0, 17); return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] @@ -4289,7 +4289,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VEXT))] "TARGET_NEON" { - neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); + arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; } [(set_attr "type" "neon_ext<q>")] @@ -4386,7 +4386,7 @@ if (BYTES_BIG_ENDIAN) VSHR_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); + arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_shift_imm<q>")] @@ -4400,7 +4400,7 @@ if (BYTES_BIG_ENDIAN) VSHRN_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); + arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; } [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -4414,7 +4414,7 @@ if (BYTES_BIG_ENDIAN) VQSHRN_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); + arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; } [(set_attr "type" "neon_sat_shift_imm_narrow_q")] @@ -4428,7 +4428,7 @@ if (BYTES_BIG_ENDIAN) VQSHRUN_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); + arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; } [(set_attr "type" "neon_sat_shift_imm_narrow_q")] @@ -4441,7 +4441,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VSHL_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); + arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_shift_imm<q>")] @@ -4454,7 +4454,7 @@ if (BYTES_BIG_ENDIAN) VQSHL_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); + arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_sat_shift_imm<q>")] @@ -4467,7 +4467,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VQSHLU_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); + arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; } [(set_attr "type" "neon_sat_shift_imm<q>")] @@ -4481,7 +4481,7 @@ if (BYTES_BIG_ENDIAN) "TARGET_NEON" { /* The boundaries are: 0 < imm <= size. */ - neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); + arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; } [(set_attr "type" "neon_shift_imm_long")] @@ -4496,7 +4496,7 @@ if (BYTES_BIG_ENDIAN) VSRA_N))] "TARGET_NEON" { - neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); + arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; } [(set_attr "type" "neon_shift_acc<q>")] @@ -4510,7 +4510,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VSRI))] "TARGET_NEON" { - neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); + arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; } [(set_attr "type" "neon_shift_reg<q>")] @@ -4524,7 +4524,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VSLI))] "TARGET_NEON" { - neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); + arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; } [(set_attr "type" "neon_shift_reg<q>")] diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index 25f79b4d010ae24c14d97d9fead93db1eff42f32..14d6429ceabbad07ed80c4166a6e77cc9f1a062e 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -538,6 +538,10 @@ ; crypto_sha1_slow ; crypto_sha256_fast ; crypto_sha256_slow +; +; The classification below is for coprocessor instructions +; +; coproc (define_attr "type" "adc_imm,\ @@ -1071,7 +1075,8 @@ crypto_sha1_fast,\ crypto_sha1_slow,\ crypto_sha256_fast,\ - crypto_sha256_slow" + crypto_sha256_slow,\ + coproc" (const_string "untyped")) ; Is this an (integer side) multiply with a 32-bit (or smaller) result? diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index bee8795f007accc623c82b73c41a8619ebc29209..cc8ac95413ba90b8a7ef4d6c2d3a73875030af28 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -148,6 +148,8 @@ VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content. VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content. VUNSPEC_PROBE_STACK_RANGE ; Represent stack range probing. + VUNSPEC_CDP ; Represent the coprocessor cdp instruction. + VUNSPEC_CDP2 ; Represent the coprocessor cdp2 instruction. ]) ;; Enumerators for NEON unspecs. diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 45ce5c9196b2febab6bf7b1592ff79ac9e00087e..acae978b4cf726b38590521bce59b534c6185961 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -1866,7 +1866,7 @@ (float_truncate:HF (float:SF (match_dup 0))))] "TARGET_VFP_FP16INST" { - neon_const_bounds (operands[2], 1, 33); + arm_const_bounds (operands[2], 1, 33); return "vcvt.f16.<sup>32\t%0, %0, %2\;vmov.f32\t%3, %0"; } [(set_attr "conds" "unconditional") @@ -1883,7 +1883,7 @@ { rtx op1 = gen_reg_rtx (SImode); - neon_const_bounds (operands[2], 1, 33); + arm_const_bounds (operands[2], 1, 33); emit_move_insn (op1, operands[1]); emit_insn (gen_neon_vcvth<sup>_nhf_unspec (op1, op1, operands[2], @@ -1907,7 +1907,7 @@ VCVT_SI_US_N))] "TARGET_VFP_FP16INST" { - neon_const_bounds (operands[2], 1, 33); + arm_const_bounds (operands[2], 1, 33); return "vmov.f32\t%0, %1\;vcvt.<sup>%#32.f16\t%0, %0, %2"; } [(set_attr "conds" "unconditional") @@ -1925,7 +1925,7 @@ { rtx op1 = gen_reg_rtx (SImode); - neon_const_bounds (operands[2], 1, 33); + arm_const_bounds (operands[2], 1, 33); emit_insn (gen_neon_vcvth<sup>_nsi_unspec (op1, operands[1], operands[2])); emit_move_insn (operands[0], op1); DONE; diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 0669f7999beb078822e471352036d8f13517812d..5864174f52768df55a75d63ab115505b834d6290 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -12579,8 +12579,9 @@ The built-in intrinsics for the Advanced SIMD extension are available when NEON is enabled. Currently, ARM and AArch64 back ends do not support ACLE 2.0 fully. Both -back ends support CRC32 intrinsics from @file{arm_acle.h}. The ARM back end's -16-bit floating-point Advanced SIMD intrinsics currently comply to ACLE v1.1. +back ends support CRC32 intrinsics and the ARM back end supports the +Coprocessor intrinsics, all from @file{arm_acle.h}. The ARM back end's 16-bit +floating-point Advanced SIMD intrinsics currently comply to ACLE v1.1. AArch64's back end does not have support for 16-bit floating point Advanced SIMD intrinsics yet. diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 07c75e2847ae37ec22ce1f6483c3d201ac001725..b66c6a18aacc537a5fe23ef23c7043da5d7cbdf0 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1675,6 +1675,21 @@ and @code{MOVT} instructions available. ARM target generates Thumb-1 code for @code{-mthumb} with @code{CBZ} and @code{CBNZ} instructions available. +@item arm_coproc1_ok +@anchor{arm_coproc1_ok} +ARM target supports the following coprocessor instruction: @code{CDP}, +@code{LDC}, @code{STC}, @code{MCR} and @code{MRC}. + +@item arm_coproc2_ok +@anchor{arm_coproc2_ok} +ARM target supports the all the coprocessor instructions also listed as +supported in @ref{arm_coproc1_ok} and the following: @code{CDP2}, @code{LDC2}, +@code{LDC2l}, @code{STC2}, @code{STC2l}, @code{MCR2} and @code{MRC2}. + +@item arm_coproc3_ok +ARM target supports the all the coprocessor instructions also listed as +supported in @ref{arm_coproc2_ok} and the following: @code{MCRR}, @code{MCRR2}, +@code{MRRC}, and @code{MRRC2}. @end table @subsubsection AArch64-specific attributes diff --git a/gcc/testsuite/gcc.target/arm/acle/acle.exp b/gcc/testsuite/gcc.target/arm/acle/acle.exp index 91954bdff2f8fbb140bef44edbb5f040c68b92ca..f431da677940996e031ad0693427fbd3c8a211c3 100644 --- a/gcc/testsuite/gcc.target/arm/acle/acle.exp +++ b/gcc/testsuite/gcc.target/arm/acle/acle.exp @@ -27,9 +27,26 @@ load_lib gcc-dg.exp # Initialize `dg'. dg-init +set saved-dg-do-what-default ${dg-do-what-default} +set dg-do-what-default "assemble" + +set saved-lto_torture_options ${LTO_TORTURE_OPTIONS} + +# Add -ffat-lto-objects option to all LTO options such that we can do assembly +# scans. +proc add_fat_objects { list } { + set res {} + foreach el $list {set res [lappend res [concat $el " -ffat-lto-objects"]]} + return $res +}; +set LTO_TORTURE_OPTIONS [add_fat_objects ${LTO_TORTURE_OPTIONS}] + # Main loop. -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ "" "" +# Restore globals +set dg-do-what-default ${saved-dg-do-what-default} +set LTO_TORTURE_OPTIONS ${saved-lto_torture_options} # All done. dg-finish diff --git a/gcc/testsuite/gcc.target/arm/acle/cdp.c b/gcc/testsuite/gcc.target/arm/acle/cdp.c new file mode 100644 index 0000000000000000000000000000000000000000..28b218e7cfcdb7d6ce1381feb4c6dea3ff08a620 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/cdp.c @@ -0,0 +1,14 @@ +/* Test the cdp ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps" } */ +/* { dg-require-effective-target arm_coproc1_ok } */ + +#include "arm_acle.h" + +void test_cdp (void) +{ + __arm_cdp (10, 1, 2, 3, 4, 5); +} + +/* { dg-final { scan-assembler "cdp\tp10, #1, CR2, CR3, CR4, #5\n" } } */ diff --git a/gcc/testsuite/gcc.target/arm/acle/cdp2.c b/gcc/testsuite/gcc.target/arm/acle/cdp2.c new file mode 100644 index 0000000000000000000000000000000000000000..00bcd502b563cfe6df1e5d4c2e53f8034063d47e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/cdp2.c @@ -0,0 +1,14 @@ +/* Test the cdp2 ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps" } */ +/* { dg-require-effective-target arm_coproc2_ok } */ + +#include "arm_acle.h" + +void test_cdp2 (void) +{ + __arm_cdp2 (10, 4, 3, 2, 1, 0); +} + +/* { dg-final { scan-assembler "cdp2\tp10, #4, CR3, CR2, CR1, #0\n" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index b5a9faab5a47d01371c6402a5b07d34071dbc34b..1eb65d05b4181aa64ccc1474f60491456046f465 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8091,3 +8091,59 @@ proc check_effective_target_profile_update_atomic {} { int main (void) { return 0; } } "-fprofile-update=atomic -fprofile-generate"] } + +# Return 1 if the target supports coprocessor instructions: cdp, ldc, stc, mcr and +# mrc. +proc check_effective_target_arm_coproc1_ok_nocache { } { + if { ![istarget arm*-*-*] } { + return 0 + } + return [check_no_compiler_messages_nocache arm_coproc1_ok assembly { + #if (__thumb__ && !__thumb2__) || __ARM_ARCH < 4 + #error FOO + #endif + }] +} + +proc check_effective_target_arm_coproc1_ok { } { + return [check_cached_effective_target arm_coproc1_ok \ + check_effective_target_arm_coproc1_ok_nocache] +} + +# Return 1 if the target supports all coprocessor instructions checked by +# check_effective_target_arm_coproc1_ok and the following: cdp2, ldc2, ldc2l, +# stc2, stc2l, mcr2 and mrc2. +proc check_effective_target_arm_coproc2_ok_nocache { } { + if { ![check_effective_target_arm_coproc1_ok] } { + return 0 + } + return [check_no_compiler_messages_nocache arm_coproc2_ok assembly { + #if __ARM_ARCH < 5 + #error FOO + #endif + }] +} + +proc check_effective_target_arm_coproc2_ok { } { + return [check_cached_effective_target arm_coproc2_ok \ + check_effective_target_arm_coproc2_ok_nocache] +} + +# Return 1 if the target supports all coprocessor instructions checked by +# check_effective_target_arm_coproc2_ok and the following: mcrr, mcrr2, mrrc +# and mrrc2. +proc check_effective_target_arm_coproc3_ok_nocache { } { + if { ![check_effective_target_arm_coproc2_ok] } { + return 0 + } + return [check_no_compiler_messages_nocache arm_coproc3_ok assembly { + #if __ARM_ARCH < 6 && !defined (__ARM_ARCH_5TE) + #error FOO + #endif + }] +} + +proc check_effective_target_arm_coproc3_ok { } { + return [check_cached_effective_target arm_coproc3_ok \ + check_effective_target_arm_coproc3_ok_nocache] +}