Since there is

/* X86_TUNE_SPLIT_LONG_MOVES: Avoid instructions moving immediates
   directly to memory.  */
DEF_TUNE (X86_TUNE_SPLIT_LONG_MOVES, "split_long_moves", m_PPRO)

to avoid long immediate store instructions, like

c7 02 00 00 00 00    movl   $0x0,(%rdx)
c7 02 ff ff ff ff    movl   $0xffffffff,(%rdx)

add TARGET_USE_AND0_ORM1_STORE and enable *mov<mode>_(and|or) for
TARGET_USE_AND0_ORM1_STORE, which is true for TARGET_SPLIT_LONG_MOVES or
-Oz, to also generate:

83 22 00              andl   $0x0,(%rdx)
83 0a ff              orl    $0xffffffff,(%rdx)

for TARGET_SPLIT_LONG_MOVES.

gcc/

PR target/120734
* config/i386/i386.h (TARGET_USE_AND0_ORM1_STORE): New.
* config/i386/i386.md (*mov<mode>_and): Replace not -Oz split
condition with !TARGET_USE_AND0_ORM1_STORE.
(*mov<mode>_or): Likewise.
(peephole2): Transform "mov $0,mem" to "*mov<mode>_and" and
"mov $-1,mem" to "*mov<mode>_or" for TARGET_USE_AND0_ORM1_STORE.

gcc/testsuite/

PR target/120734
* gcc.target/i386/pr120734a.c: New test.
* gcc.target/i386/pr120734b.c: Likewise.
* gcc.target/i386/pr120734c.c: Likewise.

OK for master?

Thanks.

-- 
H.J.
From 1e3c5540a7c57db91d8d65a98aa9b378b506b62c Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sat, 21 Jun 2025 09:10:07 +0800
Subject: [PATCH] x86: Enable *mov<mode>_(and|or) for TARGET_SPLIT_LONG_MOVES

Since there is

/* X86_TUNE_SPLIT_LONG_MOVES: Avoid instructions moving immediates
   directly to memory.  */
DEF_TUNE (X86_TUNE_SPLIT_LONG_MOVES, "split_long_moves", m_PPRO)

to avoid long immediate store instructions, like

c7 02 00 00 00 00    	movl   $0x0,(%rdx)
c7 02 ff ff ff ff    	movl   $0xffffffff,(%rdx)

add TARGET_USE_AND0_ORM1_STORE and enable *mov<mode>_(and|or) for
TARGET_USE_AND0_ORM1_STORE, which is true for TARGET_SPLIT_LONG_MOVES or
-Oz, to also generate:

83 22 00             	andl   $0x0,(%rdx)
83 0a ff             	orl    $0xffffffff,(%rdx)

for TARGET_SPLIT_LONG_MOVES.

gcc/

	PR target/120734
	* config/i386/i386.h (TARGET_USE_AND0_ORM1_STORE): New.
	* config/i386/i386.md (*mov<mode>_and): Replace not -Oz split
	condition with !TARGET_USE_AND0_ORM1_STORE.
	(*mov<mode>_or): Likewise.
	(peephole2): Transform "mov $0,mem" to "*mov<mode>_and" and
	"mov $-1,mem" to "*mov<mode>_or" for TARGET_USE_AND0_ORM1_STORE.

gcc/testsuite/

	PR target/120734
	* gcc.target/i386/pr120734a.c: New test.
	* gcc.target/i386/pr120734b.c: Likewise.
	* gcc.target/i386/pr120734c.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386.h                    |  6 +++++
 gcc/config/i386/i386.md                   | 19 +++++++-------
 gcc/testsuite/gcc.target/i386/pr120734a.c | 32 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr120734b.c | 10 +++++++
 gcc/testsuite/gcc.target/i386/pr120734c.c | 10 +++++++
 5 files changed, 68 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120734a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120734b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120734c.c

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7c16eac7700..f3e30932526 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -494,6 +494,12 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_SSE_REDUCTION_PREFER_PSHUF \
 	ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF]
 
+/* Generate "and $0,mem" and "or $-1,mem", instead of "mov $0,mem" and
+   "mov $-1,mem" with shorter encoding for TARGET_SPLIT_LONG_MOVES or
+   -Oz.  */
+#define TARGET_USE_AND0_ORM1_STORE \
+  (TARGET_SPLIT_LONG_MOVES \
+   || (optimize_insn_for_size_p () && optimize_size > 1))
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 423ef48e518..7b3857f972c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2438,30 +2438,30 @@ (define_insn "*mov<mode>_xor"
    (set_attr "mode" "SI")
    (set_attr "length_immediate" "0")])
 
-;; Generate shorter "and $0,mem" for -Oz.  Split it to "mov $0,mem"
-;; otherwise.
+;; Generate shorter "and $0,mem" for TARGET_USE_AND0_ORM1_STORE.  Split
+;; it to "mov $0,mem" otherwise.
 (define_insn_and_split "*mov<mode>_and"
   [(set (match_operand:SWI248 0 "memory_operand" "=m")
 	(match_operand:SWI248 1 "const0_operand"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed"
   "and{<imodesuffix>}\t{%1, %0|%0, %1}"
-  "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+  "&& !TARGET_USE_AND0_ORM1_STORE"
   [(set (match_dup 0) (match_dup 1))]
   ""
   [(set_attr "type" "alu1")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
 
-;; Generate shorter "or $-1,mem" for -Oz.  Split it to "mov $-1,mem"
-;; otherwise.
+;; Generate shorter "or $-1,mem" for TARGET_USE_AND0_ORM1_STORE.  Split
+;; it to "mov $-1,mem" otherwise.
 (define_insn_and_split "*mov<mode>_or"
   [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
 	(match_operand:SWI248 1 "constm1_operand"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed"
   "or{<imodesuffix>}\t{%1, %0|%0, %1}"
-  "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+  "&& !TARGET_USE_AND0_ORM1_STORE"
   [(set (match_dup 0) (match_dup 1))]
   ""
   [(set_attr "type" "alu1")
@@ -2984,13 +2984,14 @@ (define_peephole2
 			     gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
 })
 
-;; With -Oz, transform mov $0,mem to the shorter and $0,mem.
-;; Likewise, transform mov $-1,mem to the shorter or $-1,mem.
+;; With TARGET_USE_AND0_ORM1_STORE, transform "mov $0,mem" to the
+;; shorter "and $0,mem".  Likewise, transform "mov $-1,mem" to the
+;; shorter "or $-1,mem".
 (define_peephole2
   [(set (match_operand:SWI248 0 "memory_operand")
 	(match_operand:SWI248 1 "const_int_operand"))]
   "(operands[1] == const0_rtx || operands[1] == constm1_rtx)
-   && optimize_insn_for_size_p () && optimize_size > 1
+   && TARGET_USE_AND0_ORM1_STORE
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (match_dup 1))
               (clobber (reg:CC FLAGS_REG))])])
diff --git a/gcc/testsuite/gcc.target/i386/pr120734a.c b/gcc/testsuite/gcc.target/i386/pr120734a.c
new file mode 100644
index 00000000000..4dd2f4ded5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120734a.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=split_long_moves,^lcp_stall" } */
+/* { dg-final { scan-assembler-not "mov\[wlq\]\[\t \]+\\\$0, " } } */
+/* { dg-final { scan-assembler-not "mov\[wlq\]\[\t \]+\\\$-1, " } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[\\t ]+\\\$0, " 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[\\t ]+\\\$-1, " 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "and(?:l|w)\[\\t ]+\\\$0, " 2 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "or(?:l|w)\[\\t ]+\\\$-1, " 2 { target ia32 } } } */
+
+extern short s;
+extern int i;
+extern long long int ll;
+
+void
+zero (void)
+{
+  s = 0;
+  i = 0;
+#ifdef __x86_64__
+  ll = 0;
+#endif
+}
+
+void
+m1 (void)
+{
+  s = -1;
+  i = -1;
+#ifdef __x86_64__
+  ll = -1;
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr120734b.c b/gcc/testsuite/gcc.target/i386/pr120734b.c
new file mode 100644
index 00000000000..b261af52925
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120734b.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune-ctrl=^split_long_moves,^lcp_stall" } */
+/* { dg-final { scan-assembler-not "and\[wlq\]\[\t \]+\\\$0, " } } */
+/* { dg-final { scan-assembler-not "or\[wlq\]\[\t \]+\\\$-1, " } } */
+/* { dg-final { scan-assembler-times "mov(?:w|l|q)\[\\t \]+\\\$0, " 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "mov(?:w|l|q)\[\\t \]+\\\$-1, " 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "mov(?:w|l)\[\\t \]+\\\$0, " 2 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "mov(?:w|l)\[\\t \]+\\\$-1, " 2 { target ia32 } } } */
+
+#include "pr120734a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr120734c.c b/gcc/testsuite/gcc.target/i386/pr120734c.c
new file mode 100644
index 00000000000..e29d3df7fe6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120734c.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-Oz -mtune-ctrl=^split_long_moves,^lcp_stall" } */
+/* { dg-final { scan-assembler-not "mov\[wlq\]\[\t \]+\\\$0, " } } */
+/* { dg-final { scan-assembler-not "mov\[wlq\]\[\t \]+\\\$-1, " } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[\\t ]+\\\$0, " 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[\\t ]+\\\$-1, " 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "and(?:l|w)\[\\t ]+\\\$0, " 2 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "or(?:l|w)\[\\t ]+\\\$-1, " 2 { target ia32 } } } */
+
+#include "pr120734a.c"
-- 
2.49.0

Reply via email to