Don't use vmovdqu16/vmovdqu8 with non-EVEX registers even if AVX512BW is available.
gcc/ PR target/120728 * config/i386/i386.cc (ix86_get_ssemov): Use vmovdqu16/vmovdqu8 only with EVEX registers. gcc/testsuite/ PR target/120728 * gcc.target/i386/pr120728.c: New test. -- H.J.
From fb8db1e46aa4318f8c29853d97e77353dcab1e1c Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Fri, 20 Jun 2025 16:07:18 +0800 Subject: [PATCH] x86: Don't use vmovdqu16/vmovdqu8 with non-EVEX registers Don't use vmovdqu16/vmovdqu8 with non-EVEX registers even if AVX512BW is available. gcc/ PR target/120728 * config/i386/i386.cc (ix86_get_ssemov): Use vmovdqu16/vmovdqu8 only with EVEX registers. gcc/testsuite/ PR target/120728 * gcc.target/i386/pr120728.c: New test. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386.cc | 8 +++---- gcc/testsuite/gcc.target/i386/pr120728.c | 27 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr120728.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 77853297a2f..c0284fbdf4e 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -5703,7 +5703,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); @@ -5745,7 +5745,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu8" : "%vmovdqu") : "%vmovdqa"); @@ -5759,13 +5759,13 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "vmovdqa64"); else if (egpr_p) opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovups") : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); diff --git a/gcc/testsuite/gcc.target/i386/pr120728.c b/gcc/testsuite/gcc.target/i386/pr120728.c new file mode 100644 index 00000000000..93d2cd07e2f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120728.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v4" } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+, " 3 } } */ +/* { dg-final { scan-assembler-not "vmovdqu8" } } */ +/* { dg-final { scan-assembler-not "vmovdqu16" } } */ + +typedef char __v32qi __attribute__ ((__vector_size__ (32))); +typedef char __v32qi_u __attribute__ ((__vector_size__ (32), + __aligned__ (1))); +typedef short __v16hi __attribute__ ((__vector_size__ (32))); +typedef short __v16hi_u __attribute__ ((__vector_size__ (32), + __aligned__ (1))); +typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32))); +typedef _Float16 __v16hf_u __attribute__ ((__vector_size__ (32), + __aligned__ (1))); + +extern __v32qi_u v1; +extern __v16hi_u v2; +extern __v16hf_u v3; + +void +foo (__v32qi x1, __v16hi x2, __v16hf x3) +{ + v1 = x1; + v2 = x2; + v3 = x3; +} -- 2.49.0