This option (enabled by default) preserves existing behavior by allowing use of Advanced SIMD registers while expanding memset/memcpy/memmove operations into inline instructions.
Disabling this option prevents use of these registers for environments where the FPU may be disabled to reduce the cost of saving/restoring the processor state, such as in interrupt handlers. Signed-off-by: Keith Packard <kei...@keithp.com> --- gcc/common/config/aarch64/aarch64-common.cc | 4 ++++ gcc/config/aarch64/aarch64.cc | 8 +++++--- gcc/config/aarch64/aarch64.h | 7 +++++++ gcc/config/aarch64/aarch64.opt | 4 ++++ gcc/config/aarch64/aarch64.opt.urls | 3 +++ gcc/doc/invoke.texi | 10 +++++++++- 6 files changed, 32 insertions(+), 4 deletions(-) diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index 1488697c6ce..b6b60b0fdfb 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -146,6 +146,10 @@ aarch64_handle_option (struct gcc_options *opts, opts->x_flag_aarch64_max_vectorization = val; return true; + case OPT_msimd_memops: + opts->x_aarch64_flag_simd_memops = val; + return true; + default: return true; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index d30c9c75e42..19e6973a5e3 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -19906,6 +19906,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] = OPT_moutline_atomics}, { "max-vectorization", aarch64_attr_bool, false, NULL, OPT_mmax_vectorization}, + { "simd-memops", aarch64_attr_bool, true, NULL, + OPT_msimd_memops}, { NULL, aarch64_attr_custom, false, NULL, OPT____ } }; @@ -27788,7 +27790,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove) unsigned HOST_WIDE_INT size = UINTVAL (operands[2]); /* Set inline limits for memmove/memcpy. MOPS has a separate threshold. */ - unsigned max_copy_size = TARGET_SIMD ? 256 : 128; + unsigned max_copy_size = TARGET_SIMD_MEMOPS ? 256 : 128; unsigned mops_threshold = is_memmove ? aarch64_mops_memmove_size_threshold : aarch64_mops_memcpy_size_threshold; @@ -27805,7 +27807,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove) ??? Although it would be possible to use LDP/STP Qn in streaming mode (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear whether that would improve performance. */ - bool use_qregs = size > 24 && TARGET_SIMD; + bool use_qregs = size > 24 && TARGET_SIMD_MEMOPS; base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); dst = adjust_automodify_address (dst, VOIDmode, base, 0); @@ -27905,7 +27907,7 @@ aarch64_expand_setmem (rtx *operands) machine_mode mode = BLKmode, next_mode; /* Variable-sized or strict-align memset may use the MOPS expansion. */ - if (!CONST_INT_P (operands[1]) || !TARGET_SIMD + if (!CONST_INT_P (operands[1]) || !TARGET_SIMD_MEMOPS || (STRICT_ALIGNMENT && align < 16)) return aarch64_expand_setmem_mops (operands); diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 096c853af7f..fc6fd6bf869 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -121,6 +121,13 @@ of LSE instructions. */ #define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics) +#ifndef AARCH64_SIMD_MEMOPS_DEFAULT +#define AARCH64_SIMD_MEMOPS_DEFAULT 1 +#endif + +/* Allow use of SIMD registers for memory copy and set expansions */ +#define TARGET_SIMD_MEMOPS (TARGET_SIMD && aarch64_flag_simd_memops) + /* Align global data as an optimization. */ #define DATA_ALIGNMENT(EXP, ALIGN) aarch64_data_alignment (EXP, ALIGN) diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 9ca753e6a88..1d77d2048f2 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -352,6 +352,10 @@ moutline-atomics Target Var(aarch64_flag_outline_atomics) Init(2) Save Generate local calls to out-of-line atomic operations. +msimd-memops +Target Var(aarch64_flag_simd_memops) Init(AARCH64_SIMD_MEMOPS_DEFAULT) Save +Allow use of SIMD registers in memory set/copy expansions. + -param=aarch64-vect-compare-costs= Target Joined UInteger Var(aarch64_vect_compare_costs) Init(1) IntegerRange(0, 1) Param When vectorizing, consider using multiple different approaches and use diff --git a/gcc/config/aarch64/aarch64.opt.urls b/gcc/config/aarch64/aarch64.opt.urls index 7ec14a94381..709fc86a6c1 100644 --- a/gcc/config/aarch64/aarch64.opt.urls +++ b/gcc/config/aarch64/aarch64.opt.urls @@ -92,6 +92,9 @@ UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg) mstack-protector-guard-offset= UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset) +msimd-memops +UrlSuffix(gcc/AArch64-Options.html#index-msimd-memops) + Wexperimental-fmv-target UrlSuffix(gcc/AArch64-Options.html#index-Wexperimental-fmv-target) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 00468a72ada..4d518c28049 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -835,7 +835,7 @@ Objective-C and Objective-C++ Dialects}. -moverride=@var{string} -mverbose-cost-dump -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} -mstack-protector-guard-offset=@var{offset} -mtrack-speculation --moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion +-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion -msimd-memops -Wexperimental-fmv-target} @emph{Adapteva Epiphany Options} (@ref{Adapteva Epiphany Options}) @@ -22182,6 +22182,14 @@ For best performance it is highly recommended to use @option{-mcpu} or @option{-mtune} instead. This parameter should only be used for code exploration. +@item -msimd-memops +@itemx -mno-simd-memops +Enable or disable use of Advanced SIMD registers when expanding memory +copy and memory set operations. Use of these registers can improve +performance and reduce instruction count for these operations. This +option is ignored unless Advanced SIMD registers are available. +This option is on by default. + @opindex march @item -march=@var{name} Specify the name of the target architecture and, optionally, one or -- 2.49.0