From: Richard Ball <[email protected]>

This patch adds support for the atomic_store_with_stshh intrinsic
in aarch64. This intrinsic is part of FEAT_PCDPHINT.

gcc/ChangeLog:

        * config/aarch64/aarch64-builtins.cc
        (enum aarch64_builtins): Add new flags.
        (aarch64_init_pcdphint_builtins): Create new Builtin functions.
        (aarch64_general_init_builtins): Call init for PCDPHINT.
        (aarch64_expand_stshh_builtin): Expander for new intrinsic.
        (aarch64_general_expand_builtin): Call new expander.
        * config/aarch64/aarch64-c.cc
        (aarch64_update_cpp_builtins): New feature.
        * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): 
Likewise.
        * config/aarch64/aarch64.h (TARGET_PCDPHINT): Likewise.
        * config/aarch64/arm_acle.h
        (__atomic_store_with_stshh): Generic to call builtins.
        * config/aarch64/atomics.md
        (@aarch64_atomic_store_stshh<mode>): New pattern for intrinsic.
        * config/aarch64/iterators.md: New UNSPEC.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/atomic_store_with_stshh.c.c: New test.
---
 gcc/config/aarch64/aarch64-builtins.cc        | 132 ++++++++++++++++++
 gcc/config/aarch64/aarch64-c.cc               |   1 +
 .../aarch64/aarch64-option-extensions.def     |   2 +
 gcc/config/aarch64/aarch64.h                  |   3 +
 gcc/config/aarch64/arm_acle.h                 |  31 ++++
 gcc/config/aarch64/atomics.md                 |  23 +++
 gcc/config/aarch64/iterators.md               |   1 +
 .../aarch64/atomic_store_with_stshh.c.c       | 119 ++++++++++++++++
 8 files changed, 312 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 408099a50e8..1e682c00302 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -901,6 +901,14 @@ enum aarch64_builtins
   AARCH64_BUILTIN_GCSPR,
   AARCH64_BUILTIN_GCSPOPM,
   AARCH64_BUILTIN_GCSSS,
+  /* Armv9.6-A builtins.  */
+  AARCH64_BUILTIN_STSHH_QI,
+  AARCH64_BUILTIN_STSHH_HI,
+  AARCH64_BUILTIN_STSHH_SI,
+  AARCH64_BUILTIN_STSHH_DI,
+  AARCH64_BUILTIN_STSHH_PTR,
+  AARCH64_BUILTIN_STSHH_SF,
+  AARCH64_BUILTIN_STSHH_DF,
   AARCH64_BUILTIN_MAX
 };
 
@@ -2466,6 +2474,70 @@ aarch64_init_gcs_builtins (void)
                                   AARCH64_BUILTIN_GCSSS);
 }
 
+/* Add builtins for FEAT_PCDPHINT.  */
+
+static void
+aarch64_init_pcdphint_builtins (void)
+{
+  tree ftype;
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node,
+                                   unsigned_char_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_QI]
+    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_qi", ftype,
+                                  AARCH64_BUILTIN_STSHH_QI);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node,
+                                   short_unsigned_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_HI]
+    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_hi", ftype,
+                                  AARCH64_BUILTIN_STSHH_HI);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SI]
+    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_si", ftype,
+                                  AARCH64_BUILTIN_STSHH_SI);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node,
+                                   long_long_unsigned_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DI]
+    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_di", ftype,
+                                  AARCH64_BUILTIN_STSHH_DI);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node,
+                                   ptr_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_PTR]
+    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_ptr", ftype,
+                                  AARCH64_BUILTIN_STSHH_PTR);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node,
+                                   float_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SF]
+    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_sf", ftype,
+                                  AARCH64_BUILTIN_STSHH_SF);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node,
+                                   double_type_node,
+                                   unsigned_type_node,
+                                   unsigned_type_node, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DF]
+    = aarch64_general_add_builtin ("__builtin_aarch64_stshh_df", ftype,
+                                  AARCH64_BUILTIN_STSHH_DF);
+}
+
 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group.  */
 
 void
@@ -2514,6 +2586,7 @@ aarch64_general_init_builtins (void)
                                   AARCH64_BUILTIN_CHKFEAT);
 
   aarch64_init_gcs_builtins ();
+  aarch64_init_pcdphint_builtins ();
 
   if (in_lto_p)
     handle_arm_acle_h ();
@@ -3932,6 +4005,55 @@ aarch64_expand_tbl_tbx (vec<expand_operand> &ops, int 
unspec)
   return result;
 }
 
+void
+aarch64_expand_stshh_builtin (tree exp, int fcode)
+{
+  machine_mode mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
+  rtx val = expand_normal (CALL_EXPR_ARG (exp, 1));
+  rtx mem_order = expand_normal (CALL_EXPR_ARG (exp, 2));
+  rtx ret = expand_normal (CALL_EXPR_ARG (exp, 3));
+
+  switch (fcode)
+    {
+      case AARCH64_BUILTIN_STSHH_SF:
+       {
+         rtx ival = gen_reg_rtx (SImode);
+         emit_move_insn (ival, gen_lowpart (SImode, val));
+         val = ival;
+         mode = SImode;
+         break;
+       }
+      case AARCH64_BUILTIN_STSHH_DF:
+       {
+         rtx ival = gen_reg_rtx (DImode);
+         emit_move_insn (ival, gen_lowpart (DImode, val));
+         val = ival;
+         mode = DImode;
+         break;
+       }
+      default:
+       {
+         if (!register_operand (val, mode))
+         val = force_reg (mode, val);
+         break;
+       }
+    }
+
+  rtx addr = expand_normal (CALL_EXPR_ARG (exp, 0));
+  addr = convert_memory_address (Pmode, addr);
+  rtx mem  = gen_rtx_MEM (mode, addr);
+  set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
+
+  expand_operand ops[4];
+  enum insn_code icode = code_for_aarch64_atomic_store_stshh (mode);
+  create_input_operand (&ops[0], mem, mode);
+  create_input_operand (&ops[1], val, mode);
+  create_input_operand (&ops[2], mem_order, SImode);
+  create_input_operand (&ops[3], ret, SImode);
+
+  expand_insn (icode, 4, ops);
+}
+
 /* Expand CALL_EXPR EXP, given that it is a call to the function described
    by BUILTIN_DATA, and return the function's return value.  Put the result
    in TARGET if convenient.  */
@@ -4422,6 +4544,16 @@ aarch64_general_expand_builtin (unsigned int fcode, tree 
exp, rtx target,
     case AARCH64_BUILTIN_GCSPOPM:
     case AARCH64_BUILTIN_GCSSS:
       return aarch64_expand_gcs_builtin (exp, target, fcode, ignore);
+
+    case AARCH64_BUILTIN_STSHH_QI:
+    case AARCH64_BUILTIN_STSHH_HI:
+    case AARCH64_BUILTIN_STSHH_SI:
+    case AARCH64_BUILTIN_STSHH_DI:
+    case AARCH64_BUILTIN_STSHH_PTR:
+    case AARCH64_BUILTIN_STSHH_SF:
+    case AARCH64_BUILTIN_STSHH_DF:
+      aarch64_expand_stshh_builtin (exp, fcode);
+      return target;
     }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index c3957c762ef..71ea66d1762 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -295,6 +295,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
                        "__ARM_FEATURE_SME2p1", pfile);
   aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
+  aarch64_def_or_undef (TARGET_PCDPHINT, "__ARM_FEATURE_PCDPHINT", pfile);
 
   // Function multi-versioning defines
   aarch64_def_or_undef (targetm.has_ifunc_p (),
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 083515d890d..d8a618c7c5e 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -289,6 +289,8 @@ AARCH64_OPT_EXTENSION ("sme-lutv2", SME_LUTv2, (SME2), (), 
(), "sme-lutv2")
 
 AARCH64_OPT_EXTENSION("cpa", CPA, (), (), (), "")
 
+AARCH64_OPT_EXTENSION ("pcdphint", PCDPHINT, (), (), (), "")
+
 #undef AARCH64_OPT_FMV_EXTENSION
 #undef AARCH64_OPT_EXTENSION
 #undef AARCH64_FMV_FEATURE
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2cd929d83f9..24597d49a61 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -392,6 +392,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
    but are incompatible with -mtrack-speculation. */
 #define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) && !aarch64_track_speculation)
 
+/* PCDPHINT instructions are enabled through +pcdphint.  */
+#define TARGET_PCDPHINT AARCH64_HAVE_ISA (PCDPHINT)
+
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
 #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 507b6e72bcb..d2a5be0e8f7 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -132,6 +132,37 @@ __sqrtf (float __x)
   return __builtin_aarch64_sqrtsf (__x);
 }
 
+#define __atomic_store_with_stshh(dst, value, memory_order, ret)       \
+  _Generic ((value),                                                   \
+    char:                __builtin_aarch64_stshh_qi,   \
+    unsigned char:       __builtin_aarch64_stshh_qi,   \
+    signed char:         __builtin_aarch64_stshh_qi,   \
+    unsigned short:       __builtin_aarch64_stshh_hi,  \
+    short:               __builtin_aarch64_stshh_hi,   \
+    unsigned int:        __builtin_aarch64_stshh_si,   \
+    int:                 __builtin_aarch64_stshh_si,   \
+    unsigned long:       __builtin_aarch64_stshh_di,   \
+    long:                __builtin_aarch64_stshh_di,   \
+    unsigned long long:   __builtin_aarch64_stshh_di,  \
+    long long:           __builtin_aarch64_stshh_di,   \
+    float:               __builtin_aarch64_stshh_sf,   \
+    double:              __builtin_aarch64_stshh_df,   \
+    char*:               __builtin_aarch64_stshh_ptr,  \
+    unsigned char*:       __builtin_aarch64_stshh_ptr,  \
+    signed char*:        __builtin_aarch64_stshh_ptr,  \
+    unsigned short*:      __builtin_aarch64_stshh_ptr,  \
+    short*:              __builtin_aarch64_stshh_ptr,  \
+    unsigned int*:       __builtin_aarch64_stshh_ptr,  \
+    int*:                __builtin_aarch64_stshh_ptr,  \
+    unsigned long*:       __builtin_aarch64_stshh_ptr,  \
+    long*:               __builtin_aarch64_stshh_ptr,  \
+    unsigned long long*:  __builtin_aarch64_stshh_ptr,  \
+    long long*:                  __builtin_aarch64_stshh_ptr,  \
+    float*:              __builtin_aarch64_stshh_ptr,  \
+    double*:             __builtin_aarch64_stshh_ptr,  \
+    default:             __builtin_aarch64_stshh_di    \
+  )((dst), (value), (memory_order), (ret))
+
 #pragma GCC push_options
 #pragma GCC target ("+nothing+jscvt")
 __extension__ extern __inline int32_t
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index ea4a9367fc8..4a245bbebff 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -751,6 +751,29 @@
   [(set_attr "arch" "*,rcpc8_4")]
 )
 
+(define_insn "@aarch64_atomic_store_stshh<mode>"
+  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "=Q,Ust")
+    (unspec_volatile:ALLI
+       [(match_operand:ALLI 1 "register_operand" "r,r")
+       (match_operand:SI 2 "const_int_operand")                        ;; model
+       (match_operand:SI 3 "const_int_operand")]               ;; ret_policy
+      UNSPECV_STSHH))]
+  ""
+  {
+    if (INTVAL (operands[3]) == 0)
+      output_asm_insn ("stshh keep", operands);
+    else
+      output_asm_insn ("stshh strm", operands);
+    enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+    if (is_mm_relaxed (model))
+      return "str<atomic_sfx>\t%<w>1, %0";
+    else if (which_alternative == 0)
+      return "stlr<atomic_sfx>\t%<w>1, %0";
+    else
+      return "stlur<atomic_sfx>\t%<w>1, %0";
+  }
+)
+
 (define_insn "@aarch64_load_exclusive<mode>"
   [(set (match_operand:SI 0 "register_operand" "=r")
     (zero_extend:SI
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 517b2808b5f..97b4ece1444 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1302,6 +1302,7 @@
     UNSPECV_LDA                        ; Represent an atomic load or 
load-acquire.
     UNSPECV_LDAP               ; Represent an atomic acquire load with RCpc 
semantics.
     UNSPECV_STL                        ; Represent an atomic store or 
store-release.
+    UNSPECV_STSHH              ; Represent an atomic store with an stshh hint.
     UNSPECV_ATOMIC_CMPSW       ; Represent an atomic compare swap.
     UNSPECV_ATOMIC_EXCHG       ; Represent an atomic exchange.
     UNSPECV_ATOMIC_CAS         ; Represent an atomic CAS.
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c 
b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c
new file mode 100644
index 00000000000..e5857df22d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_acle.h>
+
+/*
+** testFun1:
+** ...
+**     stshh keep
+**     strb    w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+void
+testFun1 ()
+{
+  char item1 = 0;
+  char* ptr1 = &item1;
+  char test1 = 1;
+
+  __atomic_store_with_stshh (ptr1, test1, __ATOMIC_RELAXED, 0);
+}
+
+/*
+** testFun2:
+** ...
+**     stshh keep
+**     stlrh   w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+void
+testFun2 ()
+{
+  short item2 = 10;
+  short* ptr2 = &item2;
+  short test2 = 11;
+  __atomic_store_with_stshh (ptr2, test2, __ATOMIC_RELEASE, 0);
+}
+
+/*
+** testFun3:
+** ...
+**     stshh strm
+**     stlr    w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+void
+testFun3 ()
+{
+  unsigned int item3 = 10;
+  unsigned int* ptr3 = &item3;
+  unsigned int test3 = 11;
+  __atomic_store_with_stshh (ptr3, test3, __ATOMIC_SEQ_CST, 1);
+}
+
+/*
+** testFun4:
+** ...
+**     stshh strm
+**     str     x[0-9]+, \[x[0-9]+\]
+** ...
+*/
+void
+testFun4 ()
+{
+  long item4 = 10;
+  long* ptr4 = &item4;
+  long test4 = 11;
+  __atomic_store_with_stshh (ptr4, test4, __ATOMIC_RELAXED, 1);
+}
+
+/*
+** testFun5:
+** ...
+**     stshh keep
+**     stlr    x[0-9]+, \[sp\]
+** ...
+*/
+void
+testFun5 ()
+{
+  long item5 = 10;
+  long* ptr5 = &item5;
+  long test5item = 11;
+  long* test5 = &test5item;
+  __atomic_store_with_stshh (ptr5, test5, __ATOMIC_SEQ_CST, 0);
+}
+
+/*
+** testFun6:
+** ...
+**     stshh keep
+**     stlr    w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+void
+testFun6 ()
+{
+  float item6 = 10;
+  float* ptr6 = &item6;
+  float test6 = 11;
+  __atomic_store_with_stshh (ptr6, test6, __ATOMIC_SEQ_CST, 0);
+}
+
+/*
+** testFun7:
+** ...
+**     stshh strm
+**     str     x[0-9]+, \[x[0-9]+\]
+** ...
+*/
+void
+testFun7 ()
+{
+  double item7 = 10;
+  double* ptr7 = &item7;
+  double test7 = 11;
+  __atomic_store_with_stshh (ptr7, test7, __ATOMIC_RELAXED, 1);
+}
\ No newline at end of file
-- 
2.34.1

Reply via email to