From: "hongtao.liu" <hongtao....@intel.com> Align move_max with prefer_vector_width for SPR/GNR/DMR to avoid STLF issue. It's similar as previous commit.
commit 6ea25c041964bf63014fcf7bb68fb1f5a0a4e123 Author: liuhongt <hongtao....@intel.com> Date: Thu Aug 15 12:54:07 2024 +0800 Align ix86_{move_max,store_max} with vectorizer. When none of mprefer-vector-width, avx256_optimal/avx128_optimal, avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will set ix86_{move_max,store_max} as max available vector length except for AVX part. if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_move_max = PVW_AVX512; else opts->x_ix86_move_max = PVW_AVX128; So for -mavx2, vectorizer will choose 256-bit for vectorization, but 128-bit is used for struct copy, there could be a potential STLF issue due to this "misalign". Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ready push to trunk. gcc/ChangeLog: * config/i386/x86-tune.def (X86_TUNE_AVX512_MOVE_BY_PIECES): (X86_TUNE_AVX512_STORE_BY_PIECES): gcc/testsuite/ChangeLog: * gcc.target/i386/pieces-memcpy-18.c: Use -mtune=znver5 instead of -mtune=sapphirerapids. * gcc.target/i386/pieces-memcpy-21.c: Ditto. * gcc.target/i386/pieces-memset-46.c: Ditto. * gcc.target/i386/pieces-memset-49.c: Ditto. --- gcc/config/i386/x86-tune.def | 6 ++---- gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-46.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-49.c | 2 +- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index a86cbad281c..8e642aa92a2 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -625,14 +625,12 @@ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", - m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D - | m_DIAMONDRAPIDS | m_ZNVER4 | m_ZNVER5) + m_ZNVER4 | m_ZNVER5) /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", - m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D - | m_DIAMONDRAPIDS | m_ZNVER4 | m_ZNVER5) + m_ZNVER4 | m_ZNVER5) /* X86_TUNE_AVX512_TWO_EPILOGUES: Use two vector epilogues for 512-bit vectorized loops. */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c index b15a0db9ff0..b4995ac0598 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=sapphirerapids" } */ +/* { dg-options "-O2 -march=znver5" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c index ef439f20f74..804a2989d64 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mtune=sapphirerapids -march=x86-64 -mavx2" } */ +/* { dg-options "-O2 -mtune=znver5 -march=x86-64 -mavx2" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-46.c b/gcc/testsuite/gcc.target/i386/pieces-memset-46.c index be1b054eed2..43d636ee3ff 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-46.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-46.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=sapphirerapids" } */ +/* { dg-options "-O2 -march=znver5" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-49.c b/gcc/testsuite/gcc.target/i386/pieces-memset-49.c index ad43f89a9bd..ca4933ac1d8 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-49.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-49.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mtune=sapphirerapids -march=x86-64 -mavx2" } */ +/* { dg-options "-O2 -mtune=znver5 -march=x86-64 -mavx2" } */ extern char *dst; -- 2.34.1