This option (enabled by default) preserves existing behavior by
allowing use of Advanced SIMD registers while expanding
memset/memcpy/memmove operations into inline instructions.

Disabling this option prevents use of these registers for environments
where the FPU may be disabled to reduce the cost of saving/restoring
the processor state, such as in interrupt handlers.

Signed-off-by: Keith Packard <kei...@keithp.com>
---
 gcc/common/config/aarch64/aarch64-common.cc |  4 ++++
 gcc/config/aarch64/aarch64.cc               |  8 +++++---
 gcc/config/aarch64/aarch64.h                |  7 +++++++
 gcc/config/aarch64/aarch64.opt              |  4 ++++
 gcc/config/aarch64/aarch64.opt.urls         |  3 +++
 gcc/doc/invoke.texi                         | 10 +++++++++-
 6 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 1488697c6ce..b6b60b0fdfb 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -146,6 +146,10 @@ aarch64_handle_option (struct gcc_options *opts,
       opts->x_flag_aarch64_max_vectorization = val;
       return true;
 
+    case OPT_msimd_memops:
+      opts->x_aarch64_flag_simd_memops = val;
+      return true;
+
     default:
       return true;
     }
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d30c9c75e42..19e6973a5e3 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -19906,6 +19906,8 @@ static const struct aarch64_attribute_info 
aarch64_attributes[] =
      OPT_moutline_atomics},
   { "max-vectorization", aarch64_attr_bool, false, NULL,
      OPT_mmax_vectorization},
+  { "simd-memops", aarch64_attr_bool, true, NULL,
+     OPT_msimd_memops},
   { NULL, aarch64_attr_custom, false, NULL, OPT____ }
 };
 
@@ -27788,7 +27790,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
   unsigned HOST_WIDE_INT size = UINTVAL (operands[2]);
 
   /* Set inline limits for memmove/memcpy.  MOPS has a separate threshold.  */
-  unsigned max_copy_size = TARGET_SIMD ? 256 : 128;
+  unsigned max_copy_size = TARGET_SIMD_MEMOPS ? 256 : 128;
   unsigned mops_threshold = is_memmove ? aarch64_mops_memmove_size_threshold
                                       : aarch64_mops_memcpy_size_threshold;
 
@@ -27805,7 +27807,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
      ??? Although it would be possible to use LDP/STP Qn in streaming mode
      (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear
      whether that would improve performance.  */
-  bool use_qregs = size > 24 && TARGET_SIMD;
+  bool use_qregs = size > 24 && TARGET_SIMD_MEMOPS;
 
   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
@@ -27905,7 +27907,7 @@ aarch64_expand_setmem (rtx *operands)
   machine_mode mode = BLKmode, next_mode;
 
   /* Variable-sized or strict-align memset may use the MOPS expansion.  */
-  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD
+  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD_MEMOPS
       || (STRICT_ALIGNMENT && align < 16))
     return aarch64_expand_setmem_mops (operands);
 
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 096c853af7f..fc6fd6bf869 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -121,6 +121,13 @@
    of LSE instructions.  */
 #define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics)
 
+#ifndef AARCH64_SIMD_MEMOPS_DEFAULT
+#define AARCH64_SIMD_MEMOPS_DEFAULT 1
+#endif
+
+/* Allow use of SIMD registers for memory copy and set expansions */
+#define TARGET_SIMD_MEMOPS (TARGET_SIMD && aarch64_flag_simd_memops)
+
 /* Align global data as an optimization.  */
 #define DATA_ALIGNMENT(EXP, ALIGN) aarch64_data_alignment (EXP, ALIGN)
 
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 9ca753e6a88..1d77d2048f2 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -352,6 +352,10 @@ moutline-atomics
 Target Var(aarch64_flag_outline_atomics) Init(2) Save
 Generate local calls to out-of-line atomic operations.
 
+msimd-memops
+Target Var(aarch64_flag_simd_memops) Init(AARCH64_SIMD_MEMOPS_DEFAULT) Save
+Allow use of SIMD registers in memory set/copy expansions.
+
 -param=aarch64-vect-compare-costs=
 Target Joined UInteger Var(aarch64_vect_compare_costs) Init(1) IntegerRange(0, 
1) Param
 When vectorizing, consider using multiple different approaches and use
diff --git a/gcc/config/aarch64/aarch64.opt.urls 
b/gcc/config/aarch64/aarch64.opt.urls
index 7ec14a94381..709fc86a6c1 100644
--- a/gcc/config/aarch64/aarch64.opt.urls
+++ b/gcc/config/aarch64/aarch64.opt.urls
@@ -92,6 +92,9 @@ 
UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg)
 mstack-protector-guard-offset=
 UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset)
 
+msimd-memops
+UrlSuffix(gcc/AArch64-Options.html#index-msimd-memops)
+
 Wexperimental-fmv-target
 UrlSuffix(gcc/AArch64-Options.html#index-Wexperimental-fmv-target)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 00468a72ada..4d518c28049 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -835,7 +835,7 @@ Objective-C and Objective-C++ Dialects}.
 -moverride=@var{string}  -mverbose-cost-dump
 -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg}
 -mstack-protector-guard-offset=@var{offset} -mtrack-speculation
--moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion
+-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion -msimd-memops
 -Wexperimental-fmv-target}
 
 @emph{Adapteva Epiphany Options} (@ref{Adapteva Epiphany Options})
@@ -22182,6 +22182,14 @@ For best performance it is highly recommended to use 
@option{-mcpu} or
 @option{-mtune} instead.  This parameter should only be used for code
 exploration.
 
+@item -msimd-memops
+@itemx -mno-simd-memops
+Enable or disable use of Advanced SIMD registers when expanding memory
+copy and memory set operations. Use of these registers can improve
+performance and reduce instruction count for these operations. This
+option is ignored unless Advanced SIMD registers are available.
+This option is on by default.
+
 @opindex march
 @item -march=@var{name}
 Specify the name of the target architecture and, optionally, one or
-- 
2.49.0

Reply via email to