Hi Richard,
> -----Original Message-----
> From: [email protected] <[email protected]>
> Sent: 06 January 2026 01:39
> To: [email protected]
> Cc: Richard Earnshaw <[email protected]>; Tamar Christina
> <[email protected]>; [email protected]; Wilco Dijkstra
> <[email protected]>; Alex Coplan <[email protected]>; Richard
> Ball <[email protected]>
> Subject: [PATCH v4 1/2] aarch64: Add support for FEAT_PCDPHINT
> atomic_store intrinsics.
>
> From: Richard Ball <[email protected]>
>
> This patch adds support for the atomic_store_with_stshh intrinsic
> in aarch64. This intrinsic is part of FEAT_PCDPHINT.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-builtins.cc
> (enum aarch64_builtins): Add new flags.
> (aarch64_init_pcdphint_builtins): Create new Builtin functions.
> (aarch64_general_init_builtins): Call init for PCDPHINT.
> (aarch64_expand_stshh_builtin): Expander for new intrinsic.
> (aarch64_general_expand_builtin): Call new expander.
> * config/aarch64/aarch64-c.cc
> (aarch64_update_cpp_builtins): New feature.
> * config/aarch64/aarch64.h (TARGET_PCDPHINT): Likewise.
> * config/aarch64/arm_acle.h
> (__atomic_store_with_stshh): Generic to call builtins.
> * config/aarch64/atomics.md
> (@aarch64_atomic_store_stshh<mode>): New pattern for intrinsic.
> * config/aarch64/iterators.md: New UNSPEC.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/atomic_store_with_stshh.c.c: New test.
> ---
> gcc/config/aarch64/aarch64-builtins.cc | 125 ++++++++++++
> gcc/config/aarch64/aarch64-c.cc | 1 +
> gcc/config/aarch64/aarch64.h | 3 +
> gcc/config/aarch64/arm_acle.h | 22 +++
> gcc/config/aarch64/atomics.md | 24 +++
> gcc/config/aarch64/iterators.md | 1 +
> .../aarch64/atomic_store_with_stshh.c | 185 ++++++++++++++++++
> 7 files changed, 361 insertions(+)
> create mode 100644
> gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 74388963a3d..6ba2c9981fe 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -901,6 +901,13 @@ enum aarch64_builtins
> AARCH64_BUILTIN_GCSPR,
> AARCH64_BUILTIN_GCSPOPM,
> AARCH64_BUILTIN_GCSSS,
> + /* Armv9.6-A builtins. */
> + AARCH64_BUILTIN_STSHH_QI,
> + AARCH64_BUILTIN_STSHH_HI,
> + AARCH64_BUILTIN_STSHH_SI,
> + AARCH64_BUILTIN_STSHH_DI,
> + AARCH64_BUILTIN_STSHH_SF,
> + AARCH64_BUILTIN_STSHH_DF,
> AARCH64_BUILTIN_MAX
> };
>
> @@ -2466,6 +2473,62 @@ aarch64_init_gcs_builtins (void)
> AARCH64_BUILTIN_GCSSS);
> }
>
> +/* Add builtins for FEAT_PCDPHINT. */
> +
> +static void
> +aarch64_init_pcdphint_builtins (void)
> +{
> + tree ftype;
> +
> + ftype = build_function_type_list (void_type_node, ptr_type_node,
> + unsigned_char_type_node,
> + unsigned_type_node,
> + unsigned_type_node, NULL_TREE);
> + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_QI]
> + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_qi", ftype,
> + AARCH64_BUILTIN_STSHH_QI);
> +
> + ftype = build_function_type_list (void_type_node, ptr_type_node,
> + short_unsigned_type_node,
> + unsigned_type_node,
> + unsigned_type_node, NULL_TREE);
> + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_HI]
> + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_hi", ftype,
> + AARCH64_BUILTIN_STSHH_HI);
> +
> + ftype = build_function_type_list (void_type_node, ptr_type_node,
> + unsigned_type_node,
> + unsigned_type_node,
> + unsigned_type_node, NULL_TREE);
> + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SI]
> + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_si", ftype,
> + AARCH64_BUILTIN_STSHH_SI);
> +
> + ftype = build_function_type_list (void_type_node, ptr_type_node,
> + long_long_unsigned_type_node,
> + unsigned_type_node,
> + unsigned_type_node, NULL_TREE);
> + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DI]
> + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_di", ftype,
> + AARCH64_BUILTIN_STSHH_DI);
> +
> + ftype = build_function_type_list (void_type_node, ptr_type_node,
> + float_type_node,
> + unsigned_type_node,
> + unsigned_type_node, NULL_TREE);
> + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SF]
> + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_sf", ftype,
> + AARCH64_BUILTIN_STSHH_SF);
> +
> + ftype = build_function_type_list (void_type_node, ptr_type_node,
> + double_type_node,
> + unsigned_type_node,
> + unsigned_type_node, NULL_TREE);
> + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DF]
> + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_df", ftype,
> + AARCH64_BUILTIN_STSHH_DF);
> +}
> +
> /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
>
> void
> @@ -2514,6 +2577,7 @@ aarch64_general_init_builtins (void)
> AARCH64_BUILTIN_CHKFEAT);
>
> aarch64_init_gcs_builtins ();
> + aarch64_init_pcdphint_builtins ();
>
> if (in_lto_p)
> handle_arm_acle_h ();
> @@ -3968,6 +4032,58 @@ aarch64_expand_tbl_tbx (vec<expand_operand>
> &ops, int unspec)
> return result;
> }
>
> +void
> +aarch64_expand_stshh_builtin (tree exp, int fcode)
> +{
> + machine_mode mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
> 1)));
> + rtx val = expand_normal (CALL_EXPR_ARG (exp, 1));
> + rtx mem_order = expand_normal (CALL_EXPR_ARG (exp, 2));
> + rtx ret = expand_normal (CALL_EXPR_ARG (exp, 3));
> +
> + require_const_argument (exp, 3, 0, 2);
> + require_const_argument (exp, 2, 0, 6);
> + if (seen_error ())
> + return;
> +
> + switch (fcode)
> + {
> + case AARCH64_BUILTIN_STSHH_SF:
> + {
> + val = force_lowpart_subreg (SImode, val, SFmode);
> + mode = SImode;
> + break;
> + }
> + case AARCH64_BUILTIN_STSHH_DF:
> + {
> + val = force_lowpart_subreg (DImode, val, DFmode);
> + mode = DImode;
> + break;
> + }
> + default:
> + {
> + if (CONST_INT_P (val) && INTVAL (val) == 0)
> + val = const0_rtx;
This is odd, it's saying if val is zero, set it to zero?
> + else if (!register_operand (val, mode))
> + val = force_reg (mode, val);
> + break;
Perhaps you wanted
If (val != CONST0_RTX (mode))
val = force_reg (mode, val);
? force_reg will be a NOP if it's already a register of that mode.
> + }
> + }
> +
> + rtx addr = expand_normal (CALL_EXPR_ARG (exp, 0));
> + addr = convert_memory_address (Pmode, addr);
> + rtx mem = gen_rtx_MEM (mode, addr);
> + set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
> +
> + expand_operand ops[4];
> + enum insn_code icode = code_for_aarch64_atomic_store_stshh (mode);
> + create_input_operand (&ops[0], mem, mode);
Should this be create_output_operand? It's a SET destination.
Thanks,
Tamar
> + create_input_operand (&ops[1], val, mode);
> + create_input_operand (&ops[2], mem_order, SImode);
> + create_input_operand (&ops[3], ret, SImode);
> +
> + expand_insn (icode, 4, ops);
> +}
> +
> /* Expand CALL_EXPR EXP, given that it is a call to the function described
> by BUILTIN_DATA, and return the function's return value. Put the result
> in TARGET if convenient. */
> @@ -4458,6 +4574,15 @@ aarch64_general_expand_builtin (unsigned int
> fcode, tree exp, rtx target,
> case AARCH64_BUILTIN_GCSPOPM:
> case AARCH64_BUILTIN_GCSSS:
> return aarch64_expand_gcs_builtin (exp, target, fcode, ignore);
> +
> + case AARCH64_BUILTIN_STSHH_QI:
> + case AARCH64_BUILTIN_STSHH_HI:
> + case AARCH64_BUILTIN_STSHH_SI:
> + case AARCH64_BUILTIN_STSHH_DI:
> + case AARCH64_BUILTIN_STSHH_SF:
> + case AARCH64_BUILTIN_STSHH_DF:
> + aarch64_expand_stshh_builtin (exp, fcode);
> + return target;
> }
>
> if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <=
> AARCH64_SIMD_BUILTIN_MAX)
> diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-
> c.cc
> index ee539531d36..41df1e83888 100644
> --- a/gcc/config/aarch64/aarch64-c.cc
> +++ b/gcc/config/aarch64/aarch64-c.cc
> @@ -307,6 +307,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
> aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
> "__ARM_FEATURE_SME2p1", pfile);
> aarch64_def_or_undef (TARGET_FAMINMAX,
> "__ARM_FEATURE_FAMINMAX", pfile);
> + aarch64_def_or_undef (TARGET_PCDPHINT,
> "__ARM_FEATURE_PCDPHINT", pfile);
>
> // Function multi-versioning defines
> aarch64_def_or_undef (targetm.has_ifunc_p (),
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 2b7d266de10..03802f07e1c 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -407,6 +407,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE
> ATTRIBUTE_UNUSED
> but are incompatible with -mtrack-speculation. */
> #define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) &&
> !aarch64_track_speculation)
>
> +/* PCDPHINT instructions are enabled through +pcdphint. */
> +#define TARGET_PCDPHINT AARCH64_HAVE_ISA (PCDPHINT)
> +
> /* Make sure this is always defined so we don't have to check for ifdefs
> but rather use normal ifs. */
> #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 5c601b845fa..e01c057b8cb 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -132,6 +132,28 @@ __sqrtf (float __x)
> return __builtin_aarch64_sqrtsf (__x);
> }
>
> +#define __atomic_store_with_stshh(addr, value, memory_order, ret) \
> +({ \
> + __auto_type ptr = (addr); \
> + typedef __typeof__ (*ptr) ptr_type;
> \
> + _Generic ((*ptr), \
> + char: __builtin_aarch64_stshh_qi, \
> + unsigned char: __builtin_aarch64_stshh_qi, \
> + signed char: __builtin_aarch64_stshh_qi, \
> + unsigned short: __builtin_aarch64_stshh_hi, \
> + short: __builtin_aarch64_stshh_hi, \
> + unsigned int: __builtin_aarch64_stshh_si, \
> + int: __builtin_aarch64_stshh_si, \
> + unsigned long: __builtin_aarch64_stshh_di, \
> + long: __builtin_aarch64_stshh_di, \
> + unsigned long long: __builtin_aarch64_stshh_di, \
> + long long: __builtin_aarch64_stshh_di, \
> + float: __builtin_aarch64_stshh_sf, \
> + double: __builtin_aarch64_stshh_df, \
> + default: __builtin_aarch64_stshh_di \
> + )((ptr), (ptr_type)(value), (memory_order), (ret)); \
> +})
> +
> #pragma GCC push_options
> #pragma GCC target ("+nothing+jscvt")
> __extension__ extern __inline int32_t
> diff --git a/gcc/config/aarch64/atomics.md
> b/gcc/config/aarch64/atomics.md
> index e3aa6773fb6..c9534d43c0f 100644
> --- a/gcc/config/aarch64/atomics.md
> +++ b/gcc/config/aarch64/atomics.md
> @@ -751,6 +751,30 @@
> [(set_attr "arch" "*,rcpc8_4")]
> )
>
> +(define_insn "@aarch64_atomic_store_stshh<mode>"
> + [(set (match_operand:ALLI 0 "aarch64_rcpc_memory_operand" "=Q,Ust")
> + (unspec_volatile:ALLI
> + [(match_operand:ALLI 1 "aarch64_reg_or_zero" "rZ,rZ")
> + (match_operand:SI 2 "const_int_operand") ;;
> model
> + (match_operand:SI 3 "const_int_operand")] ;; ret_policy
> + UNSPECV_STSHH))]
> + ""
> + {
> + if (INTVAL (operands[3]) == 0)
> + output_asm_insn ("stshh\tkeep", operands);
> + else
> + output_asm_insn ("stshh\tstrm", operands);
> + enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
> + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire
> (model))
> + return "str<atomic_sfx>\t%<w>1, %0";
> + else if (which_alternative == 0)
> + return "stlr<atomic_sfx>\t%<w>1, %0";
> + else
> + return "stlur<atomic_sfx>\t%<w>1, %0";
> + }
> + [(set_attr "arch" "*,rcpc8_4")]
> +)
> +
> (define_insn "@aarch64_load_exclusive<mode>"
> [(set (match_operand:SI 0 "register_operand" "=r")
> (zero_extend:SI
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md
> index 569c0876fab..588c89c8a17 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1336,6 +1336,7 @@
> UNSPECV_LDA ; Represent an atomic load or load-
> acquire.
> UNSPECV_LDAP ; Represent an atomic acquire load with RCpc
> semantics.
> UNSPECV_STL ; Represent an atomic store or store-
> release.
> + UNSPECV_STSHH ; Represent an atomic store with an stshh
> hint.
> UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap.
> UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange.
> UNSPECV_ATOMIC_CAS ; Represent an atomic CAS.
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
> b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
> new file mode 100644
> index 00000000000..9be42574b9c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
> @@ -0,0 +1,185 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv8-a -save-temps" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_acle.h>
> +
> +/*
> +** testFun1:
> +** ...
> +** stshh keep
> +** strb w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun1 ()
> +{
> + char item1 = 0;
> + char* ptr1 = &item1;
> + char test1 = 1;
> +
> + __atomic_store_with_stshh (ptr1, test1, __ATOMIC_RELAXED, 0);
> +}
> +
> +/*
> +** testFun2:
> +** ...
> +** stshh keep
> +** stlrh w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun2 ()
> +{
> + short item2 = 10;
> + short* ptr2 = &item2;
> + short test2 = 11;
> + __atomic_store_with_stshh (ptr2, test2, __ATOMIC_RELEASE, 0);
> +}
> +
> +/*
> +** testFun3:
> +** ...
> +** stshh strm
> +** stlr w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun3 ()
> +{
> + unsigned int item3 = 10;
> + unsigned int* ptr3 = &item3;
> + unsigned int test3 = 11;
> + __atomic_store_with_stshh (ptr3, test3, __ATOMIC_SEQ_CST, 1);
> +}
> +
> +/*
> +** testFun4:
> +** ...
> +** stshh strm
> +** str x[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun4 ()
> +{
> + long item4 = 10;
> + long* ptr4 = &item4;
> + long test4 = 11;
> + __atomic_store_with_stshh (ptr4, test4, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun5:
> +** ...
> +** stshh keep
> +** stlr x[0-9]+, \[sp\]
> +** ...
> +*/
> +void
> +testFun5 ()
> +{
> + long item5 = 10;
> + long* ptr5 = &item5;
> + long test5item = 11;
> + long* test5 = &test5item;
> + __atomic_store_with_stshh (ptr5, test5, __ATOMIC_SEQ_CST, 0);
> +}
> +
> +/*
> +** testFun6:
> +** ...
> +** stshh keep
> +** stlr w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun6 ()
> +{
> + float item6 = 10;
> + float* ptr6 = &item6;
> + float test6 = 11;
> + __atomic_store_with_stshh (ptr6, test6, __ATOMIC_SEQ_CST, 0);
> +}
> +
> +/*
> +** testFun7:
> +** ...
> +** stshh strm
> +** str x[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun7 ()
> +{
> + double item7 = 10;
> + double* ptr7 = &item7;
> + double test7 = 11;
> + __atomic_store_with_stshh (ptr7, test7, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun8:
> +** ...
> +** stshh keep
> +** strb w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun8 ()
> +{
> + char item8 = 0;
> + char* ptr8 = &item8;
> + long test8 = 1;
> +
> + __atomic_store_with_stshh (ptr8, test8, __ATOMIC_RELAXED, 0);
> +}
> +
> +/*
> +** testFun9:
> +** ...
> +** stshh strm
> +** str w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun9 ()
> +{
> + int item9 = 0;
> + int* ptr9 = &item9;
> + float test9 = 1;
> +
> + __atomic_store_with_stshh (ptr9, test9, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun10:
> +** ...
> +** add (x[0-9]+), \1, 1
> +** mov (w[0-9]+), 7
> +** stshh strm
> +** strb \2, \[\1\]
> +** ...
> +*/
> +static char buf[8];
> +void
> +testFun10 (void)
> +{
> + __atomic_store_with_stshh((buf + 1), (char)7, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun11:
> +** ...
> +** stshh strm
> +** str wzr, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun11 ()
> +{
> + int item11 = 10;
> + int* ptr11 = &item11;
> +
> + __atomic_store_with_stshh (ptr11, 0, __ATOMIC_RELAXED, 1);
> +}
> --
> 2.34.1