Hi Richard,

> -----Original Message-----
> From: [email protected] <[email protected]>
> Sent: 06 January 2026 01:39
> To: [email protected]
> Cc: Richard Earnshaw <[email protected]>; Tamar Christina
> <[email protected]>; [email protected]; Wilco Dijkstra
> <[email protected]>; Alex Coplan <[email protected]>; Richard
> Ball <[email protected]>
> Subject: [PATCH v4 1/2] aarch64: Add support for FEAT_PCDPHINT
> atomic_store intrinsics.
> 
> From: Richard Ball <[email protected]>
> 
> This patch adds support for the atomic_store_with_stshh intrinsic
> in aarch64. This intrinsic is part of FEAT_PCDPHINT.
> 
> gcc/ChangeLog:
> 
>       * config/aarch64/aarch64-builtins.cc
>       (enum aarch64_builtins): Add new flags.
>       (aarch64_init_pcdphint_builtins): Create new Builtin functions.
>       (aarch64_general_init_builtins): Call init for PCDPHINT.
>       (aarch64_expand_stshh_builtin): Expander for new intrinsic.
>       (aarch64_general_expand_builtin): Call new expander.
>       * config/aarch64/aarch64-c.cc
>       (aarch64_update_cpp_builtins): New feature.
>       * config/aarch64/aarch64.h (TARGET_PCDPHINT): Likewise.
>       * config/aarch64/arm_acle.h
>       (__atomic_store_with_stshh): Generic to call builtins.
>       * config/aarch64/atomics.md
>       (@aarch64_atomic_store_stshh<mode>): New pattern for intrinsic.
>       * config/aarch64/iterators.md: New UNSPEC.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/atomic_store_with_stshh.c.c: New test.
> ---
>  gcc/config/aarch64/aarch64-builtins.cc        | 125 ++++++++++++
>  gcc/config/aarch64/aarch64-c.cc               |   1 +
>  gcc/config/aarch64/aarch64.h                  |   3 +
>  gcc/config/aarch64/arm_acle.h                 |  22 +++
>  gcc/config/aarch64/atomics.md                 |  24 +++
>  gcc/config/aarch64/iterators.md               |   1 +
>  .../aarch64/atomic_store_with_stshh.c         | 185 ++++++++++++++++++
>  7 files changed, 361 insertions(+)
>  create mode 100644
> gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
> 
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 74388963a3d..6ba2c9981fe 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -901,6 +901,13 @@ enum aarch64_builtins
>    AARCH64_BUILTIN_GCSPR,
>    AARCH64_BUILTIN_GCSPOPM,
>    AARCH64_BUILTIN_GCSSS,
> +  /* Armv9.6-A builtins.  */
> +  AARCH64_BUILTIN_STSHH_QI,
> +  AARCH64_BUILTIN_STSHH_HI,
> +  AARCH64_BUILTIN_STSHH_SI,
> +  AARCH64_BUILTIN_STSHH_DI,
> +  AARCH64_BUILTIN_STSHH_SF,
> +  AARCH64_BUILTIN_STSHH_DF,
>    AARCH64_BUILTIN_MAX
>  };
> 
> @@ -2466,6 +2473,62 @@ aarch64_init_gcs_builtins (void)
>                                  AARCH64_BUILTIN_GCSSS);
>  }
> 
> +/* Add builtins for FEAT_PCDPHINT.  */
> +
> +static void
> +aarch64_init_pcdphint_builtins (void)
> +{
> +  tree ftype;
> +
> +  ftype = build_function_type_list (void_type_node, ptr_type_node,
> +                                 unsigned_char_type_node,
> +                                 unsigned_type_node,
> +                                 unsigned_type_node, NULL_TREE);
> +  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_QI]
> +    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_qi", ftype,
> +                                AARCH64_BUILTIN_STSHH_QI);
> +
> +  ftype = build_function_type_list (void_type_node, ptr_type_node,
> +                                 short_unsigned_type_node,
> +                                 unsigned_type_node,
> +                                 unsigned_type_node, NULL_TREE);
> +  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_HI]
> +    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_hi", ftype,
> +                                AARCH64_BUILTIN_STSHH_HI);
> +
> +  ftype = build_function_type_list (void_type_node, ptr_type_node,
> +                                 unsigned_type_node,
> +                                 unsigned_type_node,
> +                                 unsigned_type_node, NULL_TREE);
> +  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SI]
> +    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_si", ftype,
> +                                AARCH64_BUILTIN_STSHH_SI);
> +
> +  ftype = build_function_type_list (void_type_node, ptr_type_node,
> +                                 long_long_unsigned_type_node,
> +                                 unsigned_type_node,
> +                                 unsigned_type_node, NULL_TREE);
> +  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DI]
> +    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_di", ftype,
> +                                AARCH64_BUILTIN_STSHH_DI);
> +
> +  ftype = build_function_type_list (void_type_node, ptr_type_node,
> +                                 float_type_node,
> +                                 unsigned_type_node,
> +                                 unsigned_type_node, NULL_TREE);
> +  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SF]
> +    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_sf", ftype,
> +                                AARCH64_BUILTIN_STSHH_SF);
> +
> +  ftype = build_function_type_list (void_type_node, ptr_type_node,
> +                                 double_type_node,
> +                                 unsigned_type_node,
> +                                 unsigned_type_node, NULL_TREE);
> +  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DF]
> +    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_df", ftype,
> +                                AARCH64_BUILTIN_STSHH_DF);
> +}
> +
>  /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group.  */
> 
>  void
> @@ -2514,6 +2577,7 @@ aarch64_general_init_builtins (void)
>                                  AARCH64_BUILTIN_CHKFEAT);
> 
>    aarch64_init_gcs_builtins ();
> +  aarch64_init_pcdphint_builtins ();
> 
>    if (in_lto_p)
>      handle_arm_acle_h ();
> @@ -3968,6 +4032,58 @@ aarch64_expand_tbl_tbx (vec<expand_operand>
> &ops, int unspec)
>    return result;
>  }
> 
> +void
> +aarch64_expand_stshh_builtin (tree exp, int fcode)
> +{
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
> 1)));
> +  rtx val = expand_normal (CALL_EXPR_ARG (exp, 1));
> +  rtx mem_order = expand_normal (CALL_EXPR_ARG (exp, 2));
> +  rtx ret = expand_normal (CALL_EXPR_ARG (exp, 3));
> +
> +  require_const_argument (exp, 3, 0, 2);
> +  require_const_argument (exp, 2, 0, 6);
> +  if (seen_error ())
> +    return;
> +
> +  switch (fcode)
> +    {
> +      case AARCH64_BUILTIN_STSHH_SF:
> +     {
> +       val = force_lowpart_subreg (SImode, val, SFmode);
> +       mode = SImode;
> +       break;
> +     }
> +      case AARCH64_BUILTIN_STSHH_DF:
> +     {
> +       val = force_lowpart_subreg (DImode, val, DFmode);
> +       mode = DImode;
> +       break;
> +     }
> +      default:
> +     {
> +       if (CONST_INT_P (val) && INTVAL (val) == 0)
> +         val = const0_rtx;

This is odd, it's saying if val is zero, set it to zero?

> +       else if (!register_operand (val, mode))
> +         val = force_reg (mode, val);
> +       break;

Perhaps you wanted
If (val != CONST0_RTX (mode))
  val = force_reg (mode, val);

? force_reg will be a NOP if it's already a register of that mode.

> +     }
> +    }
> +
> +  rtx addr = expand_normal (CALL_EXPR_ARG (exp, 0));
> +  addr = convert_memory_address (Pmode, addr);
> +  rtx mem  = gen_rtx_MEM (mode, addr);
> +  set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
> +
> +  expand_operand ops[4];
> +  enum insn_code icode = code_for_aarch64_atomic_store_stshh (mode);
> +  create_input_operand (&ops[0], mem, mode);

Should this be create_output_operand? It's a SET destination.

Thanks,
Tamar

> +  create_input_operand (&ops[1], val, mode);
> +  create_input_operand (&ops[2], mem_order, SImode);
> +  create_input_operand (&ops[3], ret, SImode);
> +
> +  expand_insn (icode, 4, ops);
> +}
> +
>  /* Expand CALL_EXPR EXP, given that it is a call to the function described
>     by BUILTIN_DATA, and return the function's return value.  Put the result
>     in TARGET if convenient.  */
> @@ -4458,6 +4574,15 @@ aarch64_general_expand_builtin (unsigned int
> fcode, tree exp, rtx target,
>      case AARCH64_BUILTIN_GCSPOPM:
>      case AARCH64_BUILTIN_GCSSS:
>        return aarch64_expand_gcs_builtin (exp, target, fcode, ignore);
> +
> +    case AARCH64_BUILTIN_STSHH_QI:
> +    case AARCH64_BUILTIN_STSHH_HI:
> +    case AARCH64_BUILTIN_STSHH_SI:
> +    case AARCH64_BUILTIN_STSHH_DI:
> +    case AARCH64_BUILTIN_STSHH_SF:
> +    case AARCH64_BUILTIN_STSHH_DF:
> +      aarch64_expand_stshh_builtin (exp, fcode);
> +      return target;
>      }
> 
>    if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <=
> AARCH64_SIMD_BUILTIN_MAX)
> diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-
> c.cc
> index ee539531d36..41df1e83888 100644
> --- a/gcc/config/aarch64/aarch64-c.cc
> +++ b/gcc/config/aarch64/aarch64-c.cc
> @@ -307,6 +307,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
>    aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
>                       "__ARM_FEATURE_SME2p1", pfile);
>    aarch64_def_or_undef (TARGET_FAMINMAX,
> "__ARM_FEATURE_FAMINMAX", pfile);
> +  aarch64_def_or_undef (TARGET_PCDPHINT,
> "__ARM_FEATURE_PCDPHINT", pfile);
> 
>    // Function multi-versioning defines
>    aarch64_def_or_undef (targetm.has_ifunc_p (),
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 2b7d266de10..03802f07e1c 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -407,6 +407,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE
> ATTRIBUTE_UNUSED
>     but are incompatible with -mtrack-speculation. */
>  #define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) &&
> !aarch64_track_speculation)
> 
> +/* PCDPHINT instructions are enabled through +pcdphint.  */
> +#define TARGET_PCDPHINT AARCH64_HAVE_ISA (PCDPHINT)
> +
>  /* Make sure this is always defined so we don't have to check for ifdefs
>     but rather use normal ifs.  */
>  #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 5c601b845fa..e01c057b8cb 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -132,6 +132,28 @@ __sqrtf (float __x)
>    return __builtin_aarch64_sqrtsf (__x);
>  }
> 
> +#define __atomic_store_with_stshh(addr, value, memory_order, ret)    \
> +({                                                                   \
> +  __auto_type ptr = (addr);                                          \
> +  typedef __typeof__ (*ptr) ptr_type;                                        
> \
> +  _Generic ((*ptr),                                                  \
> +    char:              __builtin_aarch64_stshh_qi,   \
> +    unsigned char:     __builtin_aarch64_stshh_qi,   \
> +    signed char:       __builtin_aarch64_stshh_qi,   \
> +    unsigned short:       __builtin_aarch64_stshh_hi,        \
> +    short:             __builtin_aarch64_stshh_hi,   \
> +    unsigned int:      __builtin_aarch64_stshh_si,   \
> +    int:               __builtin_aarch64_stshh_si,   \
> +    unsigned long:     __builtin_aarch64_stshh_di,   \
> +    long:              __builtin_aarch64_stshh_di,   \
> +    unsigned long long:   __builtin_aarch64_stshh_di,        \
> +    long long:                 __builtin_aarch64_stshh_di,   \
> +    float:             __builtin_aarch64_stshh_sf,   \
> +    double:            __builtin_aarch64_stshh_df,   \
> +    default:           __builtin_aarch64_stshh_di    \
> +  )((ptr), (ptr_type)(value), (memory_order), (ret));        \
> +})
> +
>  #pragma GCC push_options
>  #pragma GCC target ("+nothing+jscvt")
>  __extension__ extern __inline int32_t
> diff --git a/gcc/config/aarch64/atomics.md
> b/gcc/config/aarch64/atomics.md
> index e3aa6773fb6..c9534d43c0f 100644
> --- a/gcc/config/aarch64/atomics.md
> +++ b/gcc/config/aarch64/atomics.md
> @@ -751,6 +751,30 @@
>    [(set_attr "arch" "*,rcpc8_4")]
>  )
> 
> +(define_insn "@aarch64_atomic_store_stshh<mode>"
> +  [(set (match_operand:ALLI 0 "aarch64_rcpc_memory_operand" "=Q,Ust")
> +    (unspec_volatile:ALLI
> +       [(match_operand:ALLI 1 "aarch64_reg_or_zero" "rZ,rZ")
> +       (match_operand:SI 2 "const_int_operand")                      ;;
> model
> +       (match_operand:SI 3 "const_int_operand")]             ;; ret_policy
> +      UNSPECV_STSHH))]
> +  ""
> +  {
> +    if (INTVAL (operands[3]) == 0)
> +      output_asm_insn ("stshh\tkeep", operands);
> +    else
> +      output_asm_insn ("stshh\tstrm", operands);
> +    enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
> +    if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire
> (model))
> +      return "str<atomic_sfx>\t%<w>1, %0";
> +    else if (which_alternative == 0)
> +      return "stlr<atomic_sfx>\t%<w>1, %0";
> +    else
> +      return "stlur<atomic_sfx>\t%<w>1, %0";
> +  }
> +  [(set_attr "arch" "*,rcpc8_4")]
> +)
> +
>  (define_insn "@aarch64_load_exclusive<mode>"
>    [(set (match_operand:SI 0 "register_operand" "=r")
>      (zero_extend:SI
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md
> index 569c0876fab..588c89c8a17 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1336,6 +1336,7 @@
>      UNSPECV_LDA                      ; Represent an atomic load or load-
> acquire.
>      UNSPECV_LDAP             ; Represent an atomic acquire load with RCpc
> semantics.
>      UNSPECV_STL                      ; Represent an atomic store or store-
> release.
> +    UNSPECV_STSHH            ; Represent an atomic store with an stshh
> hint.
>      UNSPECV_ATOMIC_CMPSW     ; Represent an atomic compare swap.
>      UNSPECV_ATOMIC_EXCHG     ; Represent an atomic exchange.
>      UNSPECV_ATOMIC_CAS               ; Represent an atomic CAS.
> diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
> b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
> new file mode 100644
> index 00000000000..9be42574b9c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c
> @@ -0,0 +1,185 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv8-a -save-temps" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_acle.h>
> +
> +/*
> +** testFun1:
> +** ...
> +**   stshh   keep
> +**   strb    w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun1 ()
> +{
> +  char item1 = 0;
> +  char* ptr1 = &item1;
> +  char test1 = 1;
> +
> +  __atomic_store_with_stshh (ptr1, test1, __ATOMIC_RELAXED, 0);
> +}
> +
> +/*
> +** testFun2:
> +** ...
> +**   stshh   keep
> +**   stlrh   w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun2 ()
> +{
> +  short item2 = 10;
> +  short* ptr2 = &item2;
> +  short test2 = 11;
> +  __atomic_store_with_stshh (ptr2, test2, __ATOMIC_RELEASE, 0);
> +}
> +
> +/*
> +** testFun3:
> +** ...
> +**   stshh   strm
> +**   stlr    w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun3 ()
> +{
> +  unsigned int item3 = 10;
> +  unsigned int* ptr3 = &item3;
> +  unsigned int test3 = 11;
> +  __atomic_store_with_stshh (ptr3, test3, __ATOMIC_SEQ_CST, 1);
> +}
> +
> +/*
> +** testFun4:
> +** ...
> +**   stshh   strm
> +**   str     x[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun4 ()
> +{
> +  long item4 = 10;
> +  long* ptr4 = &item4;
> +  long test4 = 11;
> +  __atomic_store_with_stshh (ptr4, test4, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun5:
> +** ...
> +**   stshh   keep
> +**   stlr    x[0-9]+, \[sp\]
> +** ...
> +*/
> +void
> +testFun5 ()
> +{
> +  long item5 = 10;
> +  long* ptr5 = &item5;
> +  long test5item = 11;
> +  long* test5 = &test5item;
> +  __atomic_store_with_stshh (ptr5, test5, __ATOMIC_SEQ_CST, 0);
> +}
> +
> +/*
> +** testFun6:
> +** ...
> +**   stshh   keep
> +**   stlr    w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun6 ()
> +{
> +  float item6 = 10;
> +  float* ptr6 = &item6;
> +  float test6 = 11;
> +  __atomic_store_with_stshh (ptr6, test6, __ATOMIC_SEQ_CST, 0);
> +}
> +
> +/*
> +** testFun7:
> +** ...
> +**   stshh   strm
> +**   str     x[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun7 ()
> +{
> +  double item7 = 10;
> +  double* ptr7 = &item7;
> +  double test7 = 11;
> +  __atomic_store_with_stshh (ptr7, test7, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun8:
> +** ...
> +**   stshh   keep
> +**   strb    w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun8 ()
> +{
> +  char item8 = 0;
> +  char* ptr8 = &item8;
> +  long test8 = 1;
> +
> +  __atomic_store_with_stshh (ptr8, test8, __ATOMIC_RELAXED, 0);
> +}
> +
> +/*
> +** testFun9:
> +** ...
> +**   stshh   strm
> +**   str     w[0-9]+, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun9 ()
> +{
> +  int item9 = 0;
> +  int* ptr9 = &item9;
> +  float test9 = 1;
> +
> +  __atomic_store_with_stshh (ptr9, test9, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun10:
> +** ...
> +**   add     (x[0-9]+), \1, 1
> +**   mov     (w[0-9]+), 7
> +**   stshh   strm
> +**   strb    \2, \[\1\]
> +** ...
> +*/
> +static char buf[8];
> +void
> +testFun10 (void)
> +{
> +  __atomic_store_with_stshh((buf + 1), (char)7, __ATOMIC_RELAXED, 1);
> +}
> +
> +/*
> +** testFun11:
> +** ...
> +**   stshh   strm
> +**   str     wzr, \[x[0-9]+\]
> +** ...
> +*/
> +void
> +testFun11 ()
> +{
> +  int item11 = 10;
> +  int* ptr11 = &item11;
> +
> +  __atomic_store_with_stshh (ptr11, 0, __ATOMIC_RELAXED, 1);
> +}
> --
> 2.34.1

Reply via email to