Don't use vmovdqu16/vmovdqu8 with non-EVEX registers even if AVX512BW is
available.

gcc/

PR target/120728
* config/i386/i386.cc (ix86_get_ssemov): Use vmovdqu16/vmovdqu8
only with EVEX registers.

gcc/testsuite/

PR target/120728
* gcc.target/i386/pr120728.c: New test.


-- 
H.J.
From fb8db1e46aa4318f8c29853d97e77353dcab1e1c Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Fri, 20 Jun 2025 16:07:18 +0800
Subject: [PATCH] x86: Don't use vmovdqu16/vmovdqu8 with non-EVEX registers

Don't use vmovdqu16/vmovdqu8 with non-EVEX registers even if AVX512BW is
available.

gcc/

	PR target/120728
	* config/i386/i386.cc (ix86_get_ssemov): Use vmovdqu16/vmovdqu8
	only with EVEX registers.

gcc/testsuite/

	PR target/120728
	* gcc.target/i386/pr120728.c: New test.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386.cc                  |  8 +++----
 gcc/testsuite/gcc.target/i386/pr120728.c | 27 ++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120728.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 77853297a2f..c0284fbdf4e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5703,7 +5703,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu16"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5745,7 +5745,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu8"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5759,13 +5759,13 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "vmovdqa64");
 	  else if (egpr_p)
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu16"
 			 : "%vmovups")
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu16"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
diff --git a/gcc/testsuite/gcc.target/i386/pr120728.c b/gcc/testsuite/gcc.target/i386/pr120728.c
new file mode 100644
index 00000000000..93d2cd07e2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120728.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+, " 3 } } */
+/* { dg-final { scan-assembler-not "vmovdqu8" } } */
+/* { dg-final { scan-assembler-not "vmovdqu16" } } */
+
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi_u __attribute__ ((__vector_size__ (32),
+				       __aligned__ (1)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi_u __attribute__ ((__vector_size__ (32),
+					   __aligned__ (1)));
+typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
+typedef _Float16 __v16hf_u __attribute__ ((__vector_size__ (32),
+					   __aligned__ (1)));
+
+extern __v32qi_u v1;
+extern __v16hi_u v2;
+extern __v16hf_u v3;
+
+void
+foo (__v32qi x1, __v16hi x2, __v16hf x3)
+{
+  v1 = x1;
+  v2 = x2;
+  v3 = x3;
+}
-- 
2.49.0

Reply via email to