Re: [PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads and stores

2023-11-23 Thread Christophe Lyon
Hi!

On Thu, 23 Nov 2023 at 14:29, Jan-Benedict Glaw  wrote:
>
> On Thu, 2023-11-16 15:26:14 +, Christophe Lyon 
>  wrote:
> > diff --git a/gcc/config/arm/arm-mve-builtins-functions.h 
> > b/gcc/config/arm/arm-mve-builtins-functions.h
> > index eba1f071af0..6d234a2dd7c 100644
> > --- a/gcc/config/arm/arm-mve-builtins-functions.h
> > +++ b/gcc/config/arm/arm-mve-builtins-functions.h
> > @@ -966,6 +966,62 @@ public:
> [...]
>
> > +class full_width_access : public multi_vector_function
> > +{
> > +public:
> > +  CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
> > +: multi_vector_function (vectors_per_tuple) {}
> > +
> > +  tree
> > +  memory_scalar_type (const function_instance ) const override
> > +  {
> > +return fi.scalar_type (0);
> > +  }
> > +
> > +  machine_mode
> > +  memory_vector_mode (const function_instance ) const override
> > +  {
> > +machine_mode mode = fi.vector_mode (0);
> > +/* Vectors of floating-point are managed in memory as vectors of
> > +   integers.  */
> > +switch (mode)
> > +  {
> > +  case E_V4SFmode:
> > + mode = E_V4SImode;
> > + break;
> > +  case E_V8HFmode:
> > + mode = E_V8HImode;
> > + break;
> > +  }
>
> This introduces warnings about many enum values not being handled, so
> a default would be good I think. (I do automated builds with
> --enable-werror-always, see eg.
> http://toolchain.lug-owl.de/laminar/log/gcc-arm-eabi/48)
>

Ha right, thanks for catching this.

Fixed by commit b9dbdefac626ba20222ca534b58f7e493d713b9a

Christophe

> MfG, JBG
>
> --


Re: [PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads and stores

2023-11-23 Thread Jan-Benedict Glaw
On Thu, 2023-11-16 15:26:14 +, Christophe Lyon  
wrote:
> diff --git a/gcc/config/arm/arm-mve-builtins-functions.h 
> b/gcc/config/arm/arm-mve-builtins-functions.h
> index eba1f071af0..6d234a2dd7c 100644
> --- a/gcc/config/arm/arm-mve-builtins-functions.h
> +++ b/gcc/config/arm/arm-mve-builtins-functions.h
> @@ -966,6 +966,62 @@ public:
[...]

> +class full_width_access : public multi_vector_function
> +{
> +public:
> +  CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
> +: multi_vector_function (vectors_per_tuple) {}
> +
> +  tree
> +  memory_scalar_type (const function_instance ) const override
> +  {
> +return fi.scalar_type (0);
> +  }
> +
> +  machine_mode
> +  memory_vector_mode (const function_instance ) const override
> +  {
> +machine_mode mode = fi.vector_mode (0);
> +/* Vectors of floating-point are managed in memory as vectors of
> +   integers.  */
> +switch (mode)
> +  {
> +  case E_V4SFmode:
> + mode = E_V4SImode;
> + break;
> +  case E_V8HFmode:
> + mode = E_V8HImode;
> + break;
> +  }

This introduces warnings about many enum values not being handled, so
a default would be good I think. (I do automated builds with
--enable-werror-always, see eg.
http://toolchain.lug-owl.de/laminar/log/gcc-arm-eabi/48)

MfG, JBG

-- 


signature.asc
Description: PGP signature


RE: [PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads and stores

2023-11-16 Thread Kyrylo Tkachov



> -Original Message-
> From: Christophe Lyon 
> Sent: Thursday, November 16, 2023 3:26 PM
> To: gcc-patches@gcc.gnu.org; Richard Sandiford
> ; Richard Earnshaw
> ; Kyrylo Tkachov 
> Cc: Christophe Lyon 
> Subject: [PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads
> and stores
> 
> This patch adds base support for load/store intrinsics to the
> framework, starting with loads and stores for contiguous memory
> elements, without extension nor truncation.
> 
> Compared to the aarch64/SVE implementation, there's no support for
> gather/scatter loads/stores yet.  This will be added later as needed.
> 

Ok.
Thanks,
Kyrill

> 2023-11-16  Christophe Lyon  
> 
>   gcc/
>   * config/arm/arm-mve-builtins-functions.h (multi_vector_function)
>   (full_width_access): New classes.
>   * config/arm/arm-mve-builtins.cc
>   (find_type_suffix_for_scalar_type, infer_pointer_type)
>   (require_pointer_type, get_contiguous_base, add_mem_operand)
>   (add_fixed_operand, use_contiguous_load_insn)
>   (use_contiguous_store_insn): New.
>   * config/arm/arm-mve-builtins.h (memory_vector_mode)
>   (infer_pointer_type, require_pointer_type, get_contiguous_base)
>   (add_mem_operand)
>   (add_fixed_operand, use_contiguous_load_insn)
>   (use_contiguous_store_insn): New.
> ---
>  gcc/config/arm/arm-mve-builtins-functions.h |  56 ++
>  gcc/config/arm/arm-mve-builtins.cc  | 116 
>  gcc/config/arm/arm-mve-builtins.h   |  28 -
>  3 files changed, 199 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-functions.h
> b/gcc/config/arm/arm-mve-builtins-functions.h
> index eba1f071af0..6d234a2dd7c 100644
> --- a/gcc/config/arm/arm-mve-builtins-functions.h
> +++ b/gcc/config/arm/arm-mve-builtins-functions.h
> @@ -966,6 +966,62 @@ public:
>}
>  };
> 
> +/* A function_base that sometimes or always operates on tuples of
> +   vectors.  */
> +class multi_vector_function : public function_base
> +{
> +public:
> +  CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple)
> +: m_vectors_per_tuple (vectors_per_tuple) {}
> +
> +  unsigned int
> +  vectors_per_tuple () const override
> +  {
> +return m_vectors_per_tuple;
> +  }
> +
> +  /* The number of vectors in a tuple, or 1 if the function only operates
> + on single vectors.  */
> +  unsigned int m_vectors_per_tuple;
> +};
> +
> +/* A function_base that loads or stores contiguous memory elements
> +   without extending or truncating them.  */
> +class full_width_access : public multi_vector_function
> +{
> +public:
> +  CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
> +: multi_vector_function (vectors_per_tuple) {}
> +
> +  tree
> +  memory_scalar_type (const function_instance ) const override
> +  {
> +return fi.scalar_type (0);
> +  }
> +
> +  machine_mode
> +  memory_vector_mode (const function_instance ) const override
> +  {
> +machine_mode mode = fi.vector_mode (0);
> +/* Vectors of floating-point are managed in memory as vectors of
> +   integers.  */
> +switch (mode)
> +  {
> +  case E_V4SFmode:
> + mode = E_V4SImode;
> + break;
> +  case E_V8HFmode:
> + mode = E_V8HImode;
> + break;
> +  }
> +
> +if (m_vectors_per_tuple != 1)
> +  mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
> +
> +return mode;
> +  }
> +};
> +
>  } /* end namespace arm_mve */
> 
>  /* Declare the global function base NAME, creating it from an instance
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-
> builtins.cc
> index 02dc8fa9b73..a265cb05553 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -36,6 +36,7 @@
>  #include "fold-const.h"
>  #include "gimple.h"
>  #include "gimple-iterator.h"
> +#include "explow.h"
>  #include "emit-rtl.h"
>  #include "langhooks.h"
>  #include "stringpool.h"
> @@ -529,6 +530,22 @@ matches_type_p (const_tree model_type, const_tree
> candidate)
> && TYPE_MAIN_VARIANT (model_type) == TYPE_MAIN_VARIANT
> (candidate));
>  }
> 
> +/* If TYPE is a valid MVE element type, return the corresponding type
> +   suffix, otherwise return NUM_TYPE_SUFFIXES.  */
> +static type_suffix_index
> +find_type_suffix_for_scalar_type (const_tree type)
> +{
> +  /* A linear search should be OK here, since the code isn't hot and
> + the number of types is only small.  */
> +  for (unsign

[PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads and stores

2023-11-16 Thread Christophe Lyon
This patch adds base support for load/store intrinsics to the
framework, starting with loads and stores for contiguous memory
elements, without extension nor truncation.

Compared to the aarch64/SVE implementation, there's no support for
gather/scatter loads/stores yet.  This will be added later as needed.

2023-11-16  Christophe Lyon  

gcc/
* config/arm/arm-mve-builtins-functions.h (multi_vector_function)
(full_width_access): New classes.
* config/arm/arm-mve-builtins.cc
(find_type_suffix_for_scalar_type, infer_pointer_type)
(require_pointer_type, get_contiguous_base, add_mem_operand)
(add_fixed_operand, use_contiguous_load_insn)
(use_contiguous_store_insn): New.
* config/arm/arm-mve-builtins.h (memory_vector_mode)
(infer_pointer_type, require_pointer_type, get_contiguous_base)
(add_mem_operand)
(add_fixed_operand, use_contiguous_load_insn)
(use_contiguous_store_insn): New.
---
 gcc/config/arm/arm-mve-builtins-functions.h |  56 ++
 gcc/config/arm/arm-mve-builtins.cc  | 116 
 gcc/config/arm/arm-mve-builtins.h   |  28 -
 3 files changed, 199 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h 
b/gcc/config/arm/arm-mve-builtins-functions.h
index eba1f071af0..6d234a2dd7c 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -966,6 +966,62 @@ public:
   }
 };
 
+/* A function_base that sometimes or always operates on tuples of
+   vectors.  */
+class multi_vector_function : public function_base
+{
+public:
+  CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple)
+: m_vectors_per_tuple (vectors_per_tuple) {}
+
+  unsigned int
+  vectors_per_tuple () const override
+  {
+return m_vectors_per_tuple;
+  }
+
+  /* The number of vectors in a tuple, or 1 if the function only operates
+ on single vectors.  */
+  unsigned int m_vectors_per_tuple;
+};
+
+/* A function_base that loads or stores contiguous memory elements
+   without extending or truncating them.  */
+class full_width_access : public multi_vector_function
+{
+public:
+  CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
+: multi_vector_function (vectors_per_tuple) {}
+
+  tree
+  memory_scalar_type (const function_instance ) const override
+  {
+return fi.scalar_type (0);
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance ) const override
+  {
+machine_mode mode = fi.vector_mode (0);
+/* Vectors of floating-point are managed in memory as vectors of
+   integers.  */
+switch (mode)
+  {
+  case E_V4SFmode:
+   mode = E_V4SImode;
+   break;
+  case E_V8HFmode:
+   mode = E_V8HImode;
+   break;
+  }
+
+if (m_vectors_per_tuple != 1)
+  mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
+
+return mode;
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
diff --git a/gcc/config/arm/arm-mve-builtins.cc 
b/gcc/config/arm/arm-mve-builtins.cc
index 02dc8fa9b73..a265cb05553 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -36,6 +36,7 @@
 #include "fold-const.h"
 #include "gimple.h"
 #include "gimple-iterator.h"
+#include "explow.h"
 #include "emit-rtl.h"
 #include "langhooks.h"
 #include "stringpool.h"
@@ -529,6 +530,22 @@ matches_type_p (const_tree model_type, const_tree 
candidate)
  && TYPE_MAIN_VARIANT (model_type) == TYPE_MAIN_VARIANT (candidate));
 }
 
+/* If TYPE is a valid MVE element type, return the corresponding type
+   suffix, otherwise return NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+find_type_suffix_for_scalar_type (const_tree type)
+{
+  /* A linear search should be OK here, since the code isn't hot and
+ the number of types is only small.  */
+  for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
+  {
+   vector_type_index vector_i = type_suffixes[suffix_i].vector_type;
+   if (matches_type_p (scalar_types[vector_i], type))
+ return type_suffix_index (suffix_i);
+  }
+  return NUM_TYPE_SUFFIXES;
+}
+
 /* Report an error against LOCATION that the user has tried to use
a floating point function when the mve.fp extension is disabled.  */
 static void
@@ -1125,6 +1142,37 @@ function_resolver::resolve_to (mode_suffix_index mode,
   return res;
 }
 
+/* Require argument ARGNO to be a pointer to a scalar type that has a
+   corresponding type suffix.  Return that type suffix on success,
+   otherwise report an error and return NUM_TYPE_SUFFIXES.  */
+type_suffix_index
+function_resolver::infer_pointer_type (unsigned int argno)
+{
+  tree actual = get_argument_type (argno);
+  if (actual == error_mark_node)
+return NUM_TYPE_SUFFIXES;
+
+  if (TREE_CODE (actual) != POINTER_TYPE)
+{
+  error_at