https://gcc.gnu.org/g:cf4b35376406909d3915a6450d5c90623517e9f0
commit r16-6521-gcf4b35376406909d3915a6450d5c90623517e9f0 Author: Richard Ball <[email protected]> Date: Tue Jan 6 14:26:20 2026 +0000 aarch64: Add support for FEAT_PCDPHINT atomic_store intrinsics. This patch adds support for the atomic_store_with_stshh intrinsic in aarch64. This intrinsic is part of FEAT_PCDPHINT. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins): Add new flags. (aarch64_init_pcdphint_builtins): Create new Builtin functions. (aarch64_general_init_builtins): Call init for PCDPHINT. (aarch64_expand_stshh_builtin): Expander for new intrinsic. (aarch64_general_expand_builtin): Call new expander. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): New feature. * config/aarch64/aarch64.h (TARGET_PCDPHINT): Likewise. * config/aarch64/arm_acle.h (__atomic_store_with_stshh): Generic to call builtins. * config/aarch64/atomics.md (@aarch64_atomic_store_stshh<mode>): New pattern for intrinsic. * config/aarch64/iterators.md: New UNSPEC. gcc/testsuite/ChangeLog: * gcc.target/aarch64/atomic_store_with_stshh.c: New test. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 123 ++++++++++++++ gcc/config/aarch64/aarch64-c.cc | 1 + gcc/config/aarch64/aarch64.h | 3 + gcc/config/aarch64/arm_acle.h | 22 +++ gcc/config/aarch64/atomics.md | 24 +++ gcc/config/aarch64/iterators.md | 1 + .../gcc.target/aarch64/atomic_store_with_stshh.c | 185 +++++++++++++++++++++ 7 files changed, 359 insertions(+) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 74388963a3d5..8ad2c6ec95b7 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -901,6 +901,13 @@ enum aarch64_builtins AARCH64_BUILTIN_GCSPR, AARCH64_BUILTIN_GCSPOPM, AARCH64_BUILTIN_GCSSS, + /* Armv9.6-A builtins. */ + AARCH64_BUILTIN_STSHH_QI, + AARCH64_BUILTIN_STSHH_HI, + AARCH64_BUILTIN_STSHH_SI, + AARCH64_BUILTIN_STSHH_DI, + AARCH64_BUILTIN_STSHH_SF, + AARCH64_BUILTIN_STSHH_DF, AARCH64_BUILTIN_MAX }; @@ -2466,6 +2473,62 @@ aarch64_init_gcs_builtins (void) AARCH64_BUILTIN_GCSSS); } +/* Add builtins for FEAT_PCDPHINT. */ + +static void +aarch64_init_pcdphint_builtins (void) +{ + tree ftype; + + ftype = build_function_type_list (void_type_node, ptr_type_node, + unsigned_char_type_node, + unsigned_type_node, + unsigned_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_QI] + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_qi", ftype, + AARCH64_BUILTIN_STSHH_QI); + + ftype = build_function_type_list (void_type_node, ptr_type_node, + short_unsigned_type_node, + unsigned_type_node, + unsigned_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_HI] + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_hi", ftype, + AARCH64_BUILTIN_STSHH_HI); + + ftype = build_function_type_list (void_type_node, ptr_type_node, + unsigned_type_node, + unsigned_type_node, + unsigned_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SI] + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_si", ftype, + AARCH64_BUILTIN_STSHH_SI); + + ftype = build_function_type_list (void_type_node, ptr_type_node, + long_long_unsigned_type_node, + unsigned_type_node, + unsigned_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DI] + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_di", ftype, + AARCH64_BUILTIN_STSHH_DI); + + ftype = build_function_type_list (void_type_node, ptr_type_node, + float_type_node, + unsigned_type_node, + unsigned_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SF] + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_sf", ftype, + AARCH64_BUILTIN_STSHH_SF); + + ftype = build_function_type_list (void_type_node, ptr_type_node, + double_type_node, + unsigned_type_node, + unsigned_type_node, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DF] + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_df", ftype, + AARCH64_BUILTIN_STSHH_DF); +} + /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */ void @@ -2514,6 +2577,7 @@ aarch64_general_init_builtins (void) AARCH64_BUILTIN_CHKFEAT); aarch64_init_gcs_builtins (); + aarch64_init_pcdphint_builtins (); if (in_lto_p) handle_arm_acle_h (); @@ -3968,6 +4032,56 @@ aarch64_expand_tbl_tbx (vec<expand_operand> &ops, int unspec) return result; } +void +aarch64_expand_stshh_builtin (tree exp, int fcode) +{ + machine_mode mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1))); + rtx val = expand_normal (CALL_EXPR_ARG (exp, 1)); + rtx mem_order = expand_normal (CALL_EXPR_ARG (exp, 2)); + rtx ret = expand_normal (CALL_EXPR_ARG (exp, 3)); + + require_const_argument (exp, 3, 0, 2); + require_const_argument (exp, 2, 0, 6); + if (seen_error ()) + return; + + switch (fcode) + { + case AARCH64_BUILTIN_STSHH_SF: + { + val = force_lowpart_subreg (SImode, val, SFmode); + mode = SImode; + break; + } + case AARCH64_BUILTIN_STSHH_DF: + { + val = force_lowpart_subreg (DImode, val, DFmode); + mode = DImode; + break; + } + default: + { + if (val != CONST0_RTX (mode)) + val = force_reg (mode, val); + break; + } + } + + rtx addr = expand_normal (CALL_EXPR_ARG (exp, 0)); + addr = convert_memory_address (Pmode, addr); + rtx mem = gen_rtx_MEM (mode, addr); + set_mem_align (mem, GET_MODE_ALIGNMENT (mode)); + + expand_operand ops[4]; + enum insn_code icode = code_for_aarch64_atomic_store_stshh (mode); + create_output_operand (&ops[0], mem, mode); + create_input_operand (&ops[1], val, mode); + create_input_operand (&ops[2], mem_order, SImode); + create_input_operand (&ops[3], ret, SImode); + + expand_insn (icode, 4, ops); +} + /* Expand CALL_EXPR EXP, given that it is a call to the function described by BUILTIN_DATA, and return the function's return value. Put the result in TARGET if convenient. */ @@ -4458,6 +4572,15 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, case AARCH64_BUILTIN_GCSPOPM: case AARCH64_BUILTIN_GCSSS: return aarch64_expand_gcs_builtin (exp, target, fcode, ignore); + + case AARCH64_BUILTIN_STSHH_QI: + case AARCH64_BUILTIN_STSHH_HI: + case AARCH64_BUILTIN_STSHH_SI: + case AARCH64_BUILTIN_STSHH_DI: + case AARCH64_BUILTIN_STSHH_SF: + case AARCH64_BUILTIN_STSHH_DF: + aarch64_expand_stshh_builtin (exp, fcode); + return target; } if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index ee539531d364..41df1e838883 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -307,6 +307,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1), "__ARM_FEATURE_SME2p1", pfile); aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile); + aarch64_def_or_undef (TARGET_PCDPHINT, "__ARM_FEATURE_PCDPHINT", pfile); // Function multi-versioning defines aarch64_def_or_undef (targetm.has_ifunc_p (), diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 2b7d266de101..03802f07e1c5 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -407,6 +407,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED but are incompatible with -mtrack-speculation. */ #define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) && !aarch64_track_speculation) +/* PCDPHINT instructions are enabled through +pcdphint. */ +#define TARGET_PCDPHINT AARCH64_HAVE_ISA (PCDPHINT) + /* Make sure this is always defined so we don't have to check for ifdefs but rather use normal ifs. */ #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 5c601b845fa7..e01c057b8cb0 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -132,6 +132,28 @@ __sqrtf (float __x) return __builtin_aarch64_sqrtsf (__x); } +#define __atomic_store_with_stshh(addr, value, memory_order, ret) \ +({ \ + __auto_type ptr = (addr); \ + typedef __typeof__ (*ptr) ptr_type; \ + _Generic ((*ptr), \ + char: __builtin_aarch64_stshh_qi, \ + unsigned char: __builtin_aarch64_stshh_qi, \ + signed char: __builtin_aarch64_stshh_qi, \ + unsigned short: __builtin_aarch64_stshh_hi, \ + short: __builtin_aarch64_stshh_hi, \ + unsigned int: __builtin_aarch64_stshh_si, \ + int: __builtin_aarch64_stshh_si, \ + unsigned long: __builtin_aarch64_stshh_di, \ + long: __builtin_aarch64_stshh_di, \ + unsigned long long: __builtin_aarch64_stshh_di, \ + long long: __builtin_aarch64_stshh_di, \ + float: __builtin_aarch64_stshh_sf, \ + double: __builtin_aarch64_stshh_df, \ + default: __builtin_aarch64_stshh_di \ + )((ptr), (ptr_type)(value), (memory_order), (ret)); \ +}) + #pragma GCC push_options #pragma GCC target ("+nothing+jscvt") __extension__ extern __inline int32_t diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index e3aa6773fb6b..c9534d43c0fa 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -751,6 +751,30 @@ [(set_attr "arch" "*,rcpc8_4")] ) +(define_insn "@aarch64_atomic_store_stshh<mode>" + [(set (match_operand:ALLI 0 "aarch64_rcpc_memory_operand" "=Q,Ust") + (unspec_volatile:ALLI + [(match_operand:ALLI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:SI 2 "const_int_operand") ;; model + (match_operand:SI 3 "const_int_operand")] ;; ret_policy + UNSPECV_STSHH))] + "" + { + if (INTVAL (operands[3]) == 0) + output_asm_insn ("stshh\tkeep", operands); + else + output_asm_insn ("stshh\tstrm", operands); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) + return "str<atomic_sfx>\t%<w>1, %0"; + else if (which_alternative == 0) + return "stlr<atomic_sfx>\t%<w>1, %0"; + else + return "stlur<atomic_sfx>\t%<w>1, %0"; + } + [(set_attr "arch" "*,rcpc8_4")] +) + (define_insn "@aarch64_load_exclusive<mode>" [(set (match_operand:SI 0 "register_operand" "=r") (zero_extend:SI diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 569c0876fabf..588c89c8a176 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1336,6 +1336,7 @@ UNSPECV_LDA ; Represent an atomic load or load-acquire. UNSPECV_LDAP ; Represent an atomic acquire load with RCpc semantics. UNSPECV_STL ; Represent an atomic store or store-release. + UNSPECV_STSHH ; Represent an atomic store with an stshh hint. UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap. UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange. UNSPECV_ATOMIC_CAS ; Represent an atomic CAS. diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c new file mode 100644 index 000000000000..9be42574b9c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c @@ -0,0 +1,185 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a -save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_acle.h> + +/* +** testFun1: +** ... +** stshh keep +** strb w[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun1 () +{ + char item1 = 0; + char* ptr1 = &item1; + char test1 = 1; + + __atomic_store_with_stshh (ptr1, test1, __ATOMIC_RELAXED, 0); +} + +/* +** testFun2: +** ... +** stshh keep +** stlrh w[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun2 () +{ + short item2 = 10; + short* ptr2 = &item2; + short test2 = 11; + __atomic_store_with_stshh (ptr2, test2, __ATOMIC_RELEASE, 0); +} + +/* +** testFun3: +** ... +** stshh strm +** stlr w[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun3 () +{ + unsigned int item3 = 10; + unsigned int* ptr3 = &item3; + unsigned int test3 = 11; + __atomic_store_with_stshh (ptr3, test3, __ATOMIC_SEQ_CST, 1); +} + +/* +** testFun4: +** ... +** stshh strm +** str x[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun4 () +{ + long item4 = 10; + long* ptr4 = &item4; + long test4 = 11; + __atomic_store_with_stshh (ptr4, test4, __ATOMIC_RELAXED, 1); +} + +/* +** testFun5: +** ... +** stshh keep +** stlr x[0-9]+, \[sp\] +** ... +*/ +void +testFun5 () +{ + long item5 = 10; + long* ptr5 = &item5; + long test5item = 11; + long* test5 = &test5item; + __atomic_store_with_stshh (ptr5, test5, __ATOMIC_SEQ_CST, 0); +} + +/* +** testFun6: +** ... +** stshh keep +** stlr w[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun6 () +{ + float item6 = 10; + float* ptr6 = &item6; + float test6 = 11; + __atomic_store_with_stshh (ptr6, test6, __ATOMIC_SEQ_CST, 0); +} + +/* +** testFun7: +** ... +** stshh strm +** str x[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun7 () +{ + double item7 = 10; + double* ptr7 = &item7; + double test7 = 11; + __atomic_store_with_stshh (ptr7, test7, __ATOMIC_RELAXED, 1); +} + +/* +** testFun8: +** ... +** stshh keep +** strb w[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun8 () +{ + char item8 = 0; + char* ptr8 = &item8; + long test8 = 1; + + __atomic_store_with_stshh (ptr8, test8, __ATOMIC_RELAXED, 0); +} + +/* +** testFun9: +** ... +** stshh strm +** str w[0-9]+, \[x[0-9]+\] +** ... +*/ +void +testFun9 () +{ + int item9 = 0; + int* ptr9 = &item9; + float test9 = 1; + + __atomic_store_with_stshh (ptr9, test9, __ATOMIC_RELAXED, 1); +} + +/* +** testFun10: +** ... +** add (x[0-9]+), \1, 1 +** mov (w[0-9]+), 7 +** stshh strm +** strb \2, \[\1\] +** ... +*/ +static char buf[8]; +void +testFun10 (void) +{ + __atomic_store_with_stshh((buf + 1), (char)7, __ATOMIC_RELAXED, 1); +} + +/* +** testFun11: +** ... +** stshh strm +** str wzr, \[x[0-9]+\] +** ... +*/ +void +testFun11 () +{ + int item11 = 10; + int* ptr11 = &item11; + + __atomic_store_with_stshh (ptr11, 0, __ATOMIC_RELAXED, 1); +}
