Re: [PATCH V2] aarch64: Implement the ACLE instruction/data prefetch functions.

2023-11-30 Thread Richard Sandiford
Victor Do Nascimento  writes:
> Correct CV-qualification from being erroeously applied to the `addr'
> pointer, applying it instead to its pointer target, as specified by
> the ACLE standards.
>
> ---
>
> Implement the ACLE data and instruction prefetch functions[1] with the
> following signatures:
>
>   1. Data prefetch intrinsics:
>   
>   void __pldx (/*constant*/ unsigned int /*access_kind*/,
>/*constant*/ unsigned int /*cache_level*/,
>/*constant*/ unsigned int /*retention_policy*/,
>void const volatile *addr);
>
>   void __pld (void const volatile *addr);
>
>   2. Instruction prefetch intrinsics:
>   ---
>   void __plix (/*constant*/ unsigned int /*cache_level*/,
>/*constant*/ unsigned int /*retention_policy*/,
>void const volatile *addr);
>
>   void __pli (void const volatile *addr);
>
> `__pldx' affords the programmer more fine-grained control over the
> data prefetch behaviour than the analogous GCC builtin
> `__builtin_prefetch', and allows access to the "SLC" cache level.
>
> While `__builtin_prefetch' chooses both cache-level and retention
> policy automatically via the optional `locality' parameter, `__pldx'
> expects 2 (mandatory) arguments to explicitly define the desired
> cache-level and retention policies.
>
> `__plix' on the other hand, generates a code prefetch instruction and
> so extends functionality on aarch64 targets beyond that which is
> exposed by `builtin_prefetch'.
>
> `__pld' and `__pli' do prefetch of data and instructions,
> respectively, using default values for both cache-level and retention
> policies.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> [1] 
> https://arm-software.github.io/acle/main/acle.html#memory-prefetch-intrinsics
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-builtins.cc:
>   (AARCH64_PLD): New enum aarch64_builtins entry.
>   (AARCH64_PLDX): Likewise.
>   (AARCH64_PLI): Likewise.
>   (AARCH64_PLIX): Likewise.
>   (aarch64_init_prefetch_builtin): New.
>   (aarch64_general_init_builtins): Call prefetch init function.
>   (aarch64_expand_prefetch_builtin): New.
>   (aarch64_general_expand_builtin):  Add prefetch expansion.
>   * config/aarch64/aarch64.md (UNSPEC_PLDX): New.
>   (aarch64_pldx): New.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/builtin_pld_pli.c: New.

Generally looks good, thanks, but some comments below:

> ---
>  gcc/config/aarch64/aarch64-builtins.cc| 161 ++
>  gcc/config/aarch64/aarch64.md |  12 ++
>  gcc/config/aarch64/arm_acle.h |  30 
>  .../gcc.target/aarch64/builtin_pldx.c |  90 ++
>  4 files changed, 293 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/builtin_pldx.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 04f59fd9a54..27a4c87b300 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -808,6 +808,10 @@ enum aarch64_builtins
>AARCH64_RBIT,
>AARCH64_RBITL,
>AARCH64_RBITLL,
> +  AARCH64_PLD,
> +  AARCH64_PLDX,
> +  AARCH64_PLI,
> +  AARCH64_PLIX,
>AARCH64_BUILTIN_MAX
>  };
>  
> @@ -1798,6 +1802,34 @@ aarch64_init_rng_builtins (void)
>  AARCH64_BUILTIN_RNG_RNDRRS);
>  }
>  
> +/* Add builtins for data and instrution prefetch.  */
> +static void
> +aarch64_init_prefetch_builtin (void)
> +{
> +#define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N)  
> \
> +  aarch64_builtin_decls[INDEX] = \
> +aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX)
> +
> +  tree ftype;
> +  tree cv_argtype;
> +  cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
> +  | TYPE_QUAL_VOLATILE);
> +  cv_argtype = build_pointer_type (cv_argtype);
> +
> +  ftype = build_function_type_list (void_type_node, cv_argtype, NULL);
> +  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld");
> +  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli");
> +
> +  ftype = build_function_type_list (void_type_node, unsigned_type_node,
> + unsigned_type_node, unsigned_type_node,
> + cv_argtype, NULL);
> +  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx");
> +
> +  ftype = build_function_type_list (void_type_node, unsigned_type_node,
> + unsigned_type_node, cv_argtype, NULL);
> +  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix");
> +}
> +
>  /* Initialize the memory tagging extension (MTE) builtins.  */
>  struct
>  {
> @@ -2019,6 +2051,8 @@ aarch64_general_init_builtins (void)
>aarch64_init_rng_builtins ();
>aarch64_init_data_intrinsics ();
>  
> +  

[PATCH V2] aarch64: Implement the ACLE instruction/data prefetch functions.

2023-10-30 Thread Victor Do Nascimento
Correct CV-qualification from being erroeously applied to the `addr'
pointer, applying it instead to its pointer target, as specified by
the ACLE standards.

---

Implement the ACLE data and instruction prefetch functions[1] with the
following signatures:

  1. Data prefetch intrinsics:
  
  void __pldx (/*constant*/ unsigned int /*access_kind*/,
   /*constant*/ unsigned int /*cache_level*/,
   /*constant*/ unsigned int /*retention_policy*/,
   void const volatile *addr);

  void __pld (void const volatile *addr);

  2. Instruction prefetch intrinsics:
  ---
  void __plix (/*constant*/ unsigned int /*cache_level*/,
   /*constant*/ unsigned int /*retention_policy*/,
   void const volatile *addr);

  void __pli (void const volatile *addr);

`__pldx' affords the programmer more fine-grained control over the
data prefetch behaviour than the analogous GCC builtin
`__builtin_prefetch', and allows access to the "SLC" cache level.

While `__builtin_prefetch' chooses both cache-level and retention
policy automatically via the optional `locality' parameter, `__pldx'
expects 2 (mandatory) arguments to explicitly define the desired
cache-level and retention policies.

`__plix' on the other hand, generates a code prefetch instruction and
so extends functionality on aarch64 targets beyond that which is
exposed by `builtin_prefetch'.

`__pld' and `__pli' do prefetch of data and instructions,
respectively, using default values for both cache-level and retention
policies.

Bootstrapped and tested on aarch64-none-linux-gnu.

[1] 
https://arm-software.github.io/acle/main/acle.html#memory-prefetch-intrinsics

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc:
(AARCH64_PLD): New enum aarch64_builtins entry.
(AARCH64_PLDX): Likewise.
(AARCH64_PLI): Likewise.
(AARCH64_PLIX): Likewise.
(aarch64_init_prefetch_builtin): New.
(aarch64_general_init_builtins): Call prefetch init function.
(aarch64_expand_prefetch_builtin): New.
(aarch64_general_expand_builtin):  Add prefetch expansion.
* config/aarch64/aarch64.md (UNSPEC_PLDX): New.
(aarch64_pldx): New.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/builtin_pld_pli.c: New.
---
 gcc/config/aarch64/aarch64-builtins.cc| 161 ++
 gcc/config/aarch64/aarch64.md |  12 ++
 gcc/config/aarch64/arm_acle.h |  30 
 .../gcc.target/aarch64/builtin_pldx.c |  90 ++
 4 files changed, 293 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/builtin_pldx.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 04f59fd9a54..27a4c87b300 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -808,6 +808,10 @@ enum aarch64_builtins
   AARCH64_RBIT,
   AARCH64_RBITL,
   AARCH64_RBITLL,
+  AARCH64_PLD,
+  AARCH64_PLDX,
+  AARCH64_PLI,
+  AARCH64_PLIX,
   AARCH64_BUILTIN_MAX
 };
 
@@ -1798,6 +1802,34 @@ aarch64_init_rng_builtins (void)
   AARCH64_BUILTIN_RNG_RNDRRS);
 }
 
+/* Add builtins for data and instrution prefetch.  */
+static void
+aarch64_init_prefetch_builtin (void)
+{
+#define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N)
\
+  aarch64_builtin_decls[INDEX] =   \
+aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX)
+
+  tree ftype;
+  tree cv_argtype;
+  cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
+| TYPE_QUAL_VOLATILE);
+  cv_argtype = build_pointer_type (cv_argtype);
+
+  ftype = build_function_type_list (void_type_node, cv_argtype, NULL);
+  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld");
+  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli");
+
+  ftype = build_function_type_list (void_type_node, unsigned_type_node,
+   unsigned_type_node, unsigned_type_node,
+   cv_argtype, NULL);
+  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx");
+
+  ftype = build_function_type_list (void_type_node, unsigned_type_node,
+   unsigned_type_node, cv_argtype, NULL);
+  AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix");
+}
+
 /* Initialize the memory tagging extension (MTE) builtins.  */
 struct
 {
@@ -2019,6 +2051,8 @@ aarch64_general_init_builtins (void)
   aarch64_init_rng_builtins ();
   aarch64_init_data_intrinsics ();
 
+  aarch64_init_prefetch_builtin ();
+
   tree ftype_jcvt
 = build_function_type_list (intSI_type_node, double_type_node, NULL);
   aarch64_builtin_decls[AARCH64_JSCVT]
@@ -2599,6 +2633,127 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int 
fcode, int ignore)
   return target;
 }
 
+/* Expand a