Hi all,

This patch adds support for the aarch64 range prefetch intrinsic.

Bootstrapped and reg tested for AArch64-linux-gnu.

Okay for master (in stage 1 maybe?)

KR,
Alfie

-- >8 --

Also updates require_const_argument to always return a value in range.

gcc/ChangeLog:

        * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
        Add AARCH64_PREFETCH_PLD_RANGE and AARCH64_PREFETCH_PLDX_RANGE.
        (aarch64_init_prefetch_builtins): Add initialization of
        __pld_range and __pldx_range.
        (require_const_argument): Update to return the minval if value
        is out of range.
        (aarch64_expand_prefetch_range_builtin): New function.
        (aarch64_general_expand_builtin): Add support for
        AARCH64_PREFETCH_PLD_RANGE and AARCH64_PREFETCH_PLDX_RANGE.
        * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add
        __ARM_PREFETCH_RANGE macro.
        * config/aarch64/aarch64.md (unspec): Add UNSPEC_PLDX_RANGE and
        UNSPEC_PLD_RANGE
        (aarch64_rprfm): New instruction.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/acle/rprfm.c: New test.
        * gcc.target/aarch64/acle/rprfm_error.c: New test.
---
 gcc/config/aarch64/aarch64-builtins.cc        | 128 +++++++++++++++++-
 gcc/config/aarch64/aarch64-c.cc               |   1 +
 gcc/config/aarch64/aarch64.md                 |  14 ++
 gcc/testsuite/gcc.target/aarch64/acle/rprfm.c | 107 +++++++++++++++
 .../gcc.target/aarch64/acle/rprfm_error.c     |  31 +++++
 5 files changed, 276 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index dd74cf06ef2..39658ffab11 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -894,6 +894,8 @@ enum aarch64_builtins
   AARCH64_WSR128,
   AARCH64_PREFETCH_PLD,
   AARCH64_PREFETCH_PLDX,
+  AARCH64_PREFETCH_PLD_RANGE,
+  AARCH64_PREFETCH_PLDX_RANGE,
   AARCH64_PREFETCH_PLI,
   AARCH64_PREFETCH_PLIX,
   AARCH64_PREFETCH_PLDIR,
@@ -2249,6 +2251,18 @@ aarch64_init_prefetch_builtins (void)
 
   ftype = build_function_type_list (void_type_node, cv_argtype, NULL_TREE);
   AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pldir", PLDIR);
+
+  ftype = build_function_type_list (void_type_node, unsigned_type_node,
+                                   unsigned_type_node, integer_type_node,
+                                   unsigned_type_node, integer_type_node,
+                                   size_type_node, cv_argtype, NULL);
+  AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pldx_range", PLDX_RANGE);
+
+  ftype = build_function_type_list (void_type_node, unsigned_type_node,
+                                   unsigned_type_node,
+                                   long_long_unsigned_type_node, cv_argtype,
+                                   NULL);
+  AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pld_range", PLD_RANGE);
 }
 
 /* Initialize the memory tagging extension (MTE) builtins.  */
@@ -3650,9 +3664,13 @@ require_const_argument (tree exp, unsigned int argno, 
HOST_WIDE_INT minval,
   auto argval = wi::to_widest (arg);
 
   if (argval < minval || argval > maxval)
-    error_at (EXPR_LOCATION (exp),
-             "argument %d must be a constant immediate "
-             "in range [%wd,%wd]", argno + 1, minval, maxval);
+    {
+      error_at (EXPR_LOCATION (exp),
+               "argument %d must be a constant immediate "
+               "in range [%wd,%wd]",
+               argno + 1, minval, maxval);
+      return minval;
+    }
 
   HOST_WIDE_INT retval = argval.to_shwi ();
   return retval;
@@ -3723,8 +3741,104 @@ aarch64_expand_prefetch_builtin (tree exp, int fcode)
   maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops);
 }
 
-/* Expand an expression EXP that calls a MEMTAG built-in FCODE
-   with result going to TARGET.  */
+/* Expand a prefetch range builtin EXP.  */
+void
+aarch64_expand_prefetch_range_builtin (tree exp, int fcode)
+{
+  char prfop[11];
+  class expand_operand ops[3];
+
+  static const char *kind_s[] = {"PLD", "PST"};
+  static const char *rettn_s[] = {"KEEP", "STRM"};
+
+  int argno = 0;
+
+  int kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (kind_s));
+  int rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE 
(rettn_s));
+
+  rtx metadata = NULL_RTX;
+
+  switch (fcode)
+    {
+    case AARCH64_PREFETCH_PLDX_RANGE:
+      {
+       /* length must be in [-2^21,2^21).  */
+       int length = require_const_argument (exp, argno++, -(1 << 21), 1 << 21);
+       gcc_assert (length >= -(1 << 21) && length < (1 << 21));
+
+       /* count must be in [1,2^16].  */
+       int count = require_const_argument (exp, argno++, 1, (1 << 16) + 1);
+       gcc_assert (count >= 1 && count <= (1 << 16));
+
+       /* stride must be in [-2^21,2^21).  */
+       int stride = require_const_argument (exp, argno++, -(1 << 21), 1 << 21);
+       gcc_assert (stride >= -(1 << 21) && stride < (1 << 21));
+
+       /* There is no requirements on reuse_distance other than to be a
+          non-negative integer.  However it is meaningless for
+          values less than 2^15 or greater than 2^29.  */
+       uint64_t reuse_distance = require_const_argument (exp, argno++, 0,
+                                                          LONG_LONG_MAX);
+
+       uint64_t length_bits = ((uint64_t) length) & ((1 << 22) - 1);
+       uint64_t count_bits = ((uint64_t) count - 1) & ((1 << 16) - 1);
+       uint64_t stride_bits = ((uint64_t) stride) & ((1 << 22) - 1);
+
+       uint64_t reuse_distance_bits = 0;
+         /* If reuse distance > 512MiB or = 0 then use 0 to represent distance
+            unknown.  */
+       if (reuse_distance != 0 && reuse_distance <= (1ULL << 29))
+         {
+           /* Find the largest n such that (2 ^ (15-n)) * 32KB >= reuse
+              distance.  */
+           if (reuse_distance <= (1ULL << 15))
+             reuse_distance_bits = 15;
+           else
+             reuse_distance_bits = __builtin_clzll (reuse_distance - 1) - 34;
+
+           /* Reuse distance is a 4 bit value.  */
+           gcc_assert (reuse_distance_bits < (1 << 4));
+         }
+
+       uint64_t metadata_val = length_bits
+                               | (count_bits << 22)
+                               | (stride_bits << 38)
+                               | (reuse_distance_bits << 60);
+
+       metadata = GEN_INT (metadata_val);
+       break;
+      }
+    case AARCH64_PREFETCH_PLD_RANGE:
+      {
+       tree metadata_arg = CALL_EXPR_ARG (exp, argno++);
+       metadata = copy_to_mode_reg (E_DImode, expand_normal (metadata_arg));
+       break;
+      }
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Any -1 id variable is to be user-supplied.  Here we fill these in and run
+     bounds checks on them.  "PLI" is used only implicitly by AARCH64_PLI &
+     AARCH64_PLIX, never explicitly.  */
+  rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX, Pmode,
+                            EXPAND_NORMAL);
+
+  if (seen_error ())
+    return;
+
+  sprintf (prfop, "%s%s", kind_s[kind_id], rettn_s[rettn_id]);
+
+  rtx const_str = rtx_alloc (CONST_STRING);
+  PUT_CODE (const_str, CONST_STRING);
+  XSTR (const_str, 0) = ggc_strdup (prfop);
+
+  create_fixed_operand (&ops[0], const_str);
+  create_input_operand (&ops[1], metadata, E_DImode);
+  create_address_operand (&ops[2], address);
+  maybe_expand_insn (CODE_FOR_aarch64_rprfm, 3, ops);
+}
+
 static rtx
 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
 {
@@ -4578,6 +4692,10 @@ aarch64_general_expand_builtin (unsigned int fcode, tree 
exp, rtx target,
     case AARCH64_PREFETCH_PLDIR:
       aarch64_expand_pldir_builtin (exp);
       return target;
+    case AARCH64_PREFETCH_PLD_RANGE:
+    case AARCH64_PREFETCH_PLDX_RANGE:
+      aarch64_expand_prefetch_range_builtin (exp, fcode);
+      return target;
     case AARCH64_BUILTIN_CHKFEAT:
       {
        rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index f8be998da16..58fa761a9bb 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -310,6 +310,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
                        "__ARM_FEATURE_SME2p1", pfile);
   aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
   aarch64_def_or_undef (TARGET_PCDPHINT, "__ARM_FEATURE_PCDPHINT", pfile);
+  builtin_define ("__ARM_PREFETCH_RANGE");
 
   // Function multi-versioning defines
   aarch64_def_or_undef (targetm.has_ifunc_p (),
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 70a64a6c0ed..4c5485cba1d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -393,6 +393,8 @@ (define_c_enum "unspec" [
     UNSPEC_SYSREG_WTI
     UNSPEC_PLDX
     UNSPEC_PLDIR
+    UNSPEC_PLDX_RANGE
+    UNSPEC_PLD_RANGE
     ;; Represents an SVE-style lane index, in which the indexing applies
     ;; within the containing 128-bit block.
     UNSPEC_SVE_LANE_SELECT
@@ -1381,6 +1383,18 @@ (define_insn "aarch64_pldx"
   [(set_attr "type" "load_4")]
 )
 
+(define_insn "aarch64_rprfm"
+  [(unspec [(match_operand 0 "" "")
+           (match_operand:DI 1 "aarch64_prefetch_operand" "Dp")
+           (match_operand:DI 2 "register_operand" "r")] UNSPEC_PLDX)]
+  ""
+  {
+    operands[2] = gen_rtx_MEM (DImode, operands[2]);
+    return "rprfm\\t%0, %1, %2";
+  }
+  [(set_attr "type" "load_4")]
+)
+
 (define_insn "trap"
   [(trap_if (const_int 1) (const_int 8))]
   ""
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c 
b/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
new file mode 100644
index 00000000000..02f0f11223b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
@@ -0,0 +1,107 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a -O1 -fno-schedule-insns" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_acle.h>
+
+/* Access kind specifiers.  */
+#define PLD 0
+#define PST 1
+/* Retention policies.  */
+#define KEEP 0
+#define STRM 1
+
+/* This test is a bit awkward as we need to test the constants that get passed
+   into x1. This may be a bit fragile.  */
+
+/*
+** pldx_range:
+**...
+**     mov     x1, 0
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PSTKEEP, x1, \[x0\]
+**     rprfm   PLDSTRM, x1, \[x0\]
+**     rprfm   PSTSTRM, x1, \[x0\]
+**     mov     x1, 1
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 4194303
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 2097152
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 2097151
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 0
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 4194304
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 274873712640
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 576460752303423488
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 576460477425516544
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, -1152921504606846976
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, -2305843009213693952
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, -3458764513820540928
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 2305843009213693952
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 1152921504606846976
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     mov     x1, 0
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PLDKEEP, x1, \[x0\]
+**...
+*/
+int pldx_range (void *a) {
+  __pldx_range (PLD, KEEP, 0, 1, 0, 0, a);
+  __pldx_range (PST, KEEP, 0, 1, 0, 0, a);
+  __pldx_range (PLD, STRM, 0, 1, 0, 0, a);
+  __pldx_range (PST, STRM, 0, 1, 0, 0, a);
+  __pldx_range (PLD, KEEP, 1, 1, 0, 0, a);
+  __pldx_range (PLD, KEEP, -1, 1, 0, 0, a);
+  __pldx_range (PLD, KEEP, -(1<<21), 1, 0, 0, a);
+  __pldx_range (PLD, KEEP, (1<<21)-1, 1, 0, 0, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, 0, a);
+  __pldx_range (PLD, KEEP, 0, 2, 0, 0, a);
+  __pldx_range (PLD, KEEP, 0, 65536, 0, 0, a);
+  __pldx_range (PLD, KEEP, 0, 1, -(1<<21), 0, a);
+  __pldx_range (PLD, KEEP, 0, 1, (1<<21)-1, 0, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 15) - 1, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 15, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 16) - 1, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 16, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 16) + 1, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 28, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 28) + 1, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 29) - 1, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 29, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 29) + 1, a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 30), a);
+  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 31), a);
+}
+
+/*
+** pld_range:
+**...
+**     rprfm   PLDKEEP, x1, \[x0\]
+**     rprfm   PSTKEEP, x1, \[x0\]
+**     rprfm   PLDSTRM, x1, \[x0\]
+**     rprfm   PSTSTRM, x1, \[x0\]
+**...
+*/
+int pld_range (void *a, uint64_t m) {
+  __pld_range (PLD, KEEP, m, a);
+  __pld_range (PST, KEEP, m, a);
+  __pld_range (PLD, STRM, m, a);
+  __pld_range (PST, STRM, m, a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c 
b/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
new file mode 100644
index 00000000000..6fe71aa9922
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a -O2" } */
+
+#include <arm_acle.h>
+
+/* Access kind specifiers.  */
+#define PLD 0
+#define PST 1
+/* Retention policies.  */
+#define KEEP 0
+#define STRM 1
+
+int test (void *a, uint64_t m) {
+  __pld_range (2, KEEP, m, a);                        /* { dg-error "argument 
1 must be a constant immediate in range \\\[0,1\\\]" } */
+  __pld_range (-1, KEEP, m, a);                       /* { dg-error "argument 
1 must be a constant immediate in range \\\[0,1\\\]" } */
+  __pld_range (PLD, 2, m, a);                         /* { dg-error "argument 
2 must be a constant immediate in range \\\[0,1\\\]" } */
+  __pld_range (PLD, -1, m, a);                        /* { dg-error "argument 
2 must be a constant immediate in range \\\[0,1\\\]" } */
+
+  __pldx_range (2, KEEP, 0, 1, 0, 0, a);              /* { dg-error "argument 
1 must be a constant immediate in range \\\[0,1\\\]" } */
+  __pldx_range (PLD, 2, 0, 1, 0, 0, a);               /* { dg-error "argument 
2 must be a constant immediate in range \\\[0,1\\\]" } */
+  __pldx_range (-1, KEEP, 0, 1, 0, 0, a);             /* { dg-error "argument 
1 must be a constant immediate in range \\\[0,1\\\]" } */
+  __pldx_range (PLD, -1, 0, 1, 0, 0, a);              /* { dg-error "argument 
2 must be a constant immediate in range \\\[0,1\\\]" } */
+
+  __pldx_range (PLD, KEEP, -(1<<21) - 1, 1, 0, 0, a); /* { dg-error "argument 
3 must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
+  __pldx_range (PLD, KEEP, (1<<21), 1, 0, 0, a);      /* { dg-error "argument 
3 must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
+  __pldx_range (PLD, KEEP, 0, 0, 0, 0, a);            /* { dg-error "argument 
4 must be a constant immediate in range \\\[1,65536\\\]" } */
+  __pldx_range (PLD, KEEP, 0, (1<<16) + 1, 0, 0, a);  /* { dg-error "argument 
4 must be a constant immediate in range \\\[1,65536\\\]" } */
+  __pldx_range (PLD, KEEP, 0, 1, -(1<<21)-1, 0, a);   /* { dg-error "argument 
5 must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
+  __pldx_range (PLD, KEEP, 0, 1, (1<<21), 0, a);      /* { dg-error "argument 
5 must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
+  __pldx_range (PLD, KEEP, 0, 1, 0, -1, a);     /* { dg-error "argument 6 must 
be a constant immediate in range \\\[0,9223372036854775806\\\]" } */
+}
-- 
2.34.1

Reply via email to