On Sun, May 25, 2025 at 8:12 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Sun, May 25, 2025 at 7:47 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > commit ef26c151c14a87177d46fd3d725e7f82e040e89f
> > Author: Roger Sayle <ro...@nextmovesoftware.com>
> > Date:   Thu Dec 23 12:33:07 2021 +0000
> >
> >     x86: PR target/103773: Fix wrong-code with -Oz from pop to memory.
> >
> > transformed "mov $0,mem" to the shorter and "$0,mem" for -Oz.  But
> >
> > (define_insn "*mov<mode>_and"
> >   [(set (match_operand:SWI248 0 "memory_operand" "=m")
> >     (match_operand:SWI248 1 "const0_operand"))
> >    (clobber (reg:CC FLAGS_REG))]
> >   "reload_completed"
> >   "and{<imodesuffix>}\t{%1, %0|%0, %1}"
> >   [(set_attr "type" "alu1")
> >    (set_attr "mode" "<MODE>")
> >    (set_attr "length_immediate" "1")])
> >
> > isn't guarded for -Oz.  As a result, "and $0,mem" is generated without
> > -Oz.  Enable *mov<mode>_and only for -Oz.
> >
> > gcc/
> >
> > PR target/120427
> > * config/i386/i386.md (*mov<mode>_and): Enable only for -Oz.
> >
> > gcc/testsuite/
> >
> > PR target/120427
> > * gcc.target/i386/pr120427.c: New test.
> >
> > OK for master?
> >
>
> "mov $-1,mem" has the same issue.  Here is the updated patch to also
> enable "or $-1,mem" only for -Oz.
>
> OK for master?

It doesn't work since  "*mov<mode>_or" was extended from load.  Here is
the v2 patch:

1. Add "*mov<mode>_or_store" for "or $-1,mem".
2. Rename "*mov<mode>_or" to "*mov<mode>_or_load", replacing
nonimmediate_operand with register_operand.
3. Enable "*mov<mode>_and" and "*mov<mode>_or_store" only for -Oz.

Tested on x86-64.

-- 
H.J.
From be013c2d0bde068804fda3db6b05c89d7a26d54e Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sun, 25 May 2025 07:40:29 +0800
Subject: [PATCH v2] x86: Enable *mov<mode>_(and|or_store) only for -Oz

commit ef26c151c14a87177d46fd3d725e7f82e040e89f
Author: Roger Sayle <ro...@nextmovesoftware.com>
Date:   Thu Dec 23 12:33:07 2021 +0000

    x86: PR target/103773: Fix wrong-code with -Oz from pop to memory.

added "*mov<mode>_and" and extended "*mov<mode>_or" to transform
"mov $0,mem" to the shorter "and $0,mem" and "mov $-1,mem" to the shorter
"or $-1,mem" for -Oz.  But the new pattern:

(define_insn "*mov<mode>_and"
  [(set (match_operand:SWI248 0 "memory_operand" "=m")
    (match_operand:SWI248 1 "const0_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "and{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "1")])

and the extended pattern:

(define_insn "*mov<mode>_or"
  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
    (match_operand:SWI248 1 "constm1_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "or{<imodesuffix>}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "<MODE>")
   (set_attr "length_immediate" "1")])

aren't guarded for -Oz.  As a result, "and $0,mem" and "or $-1,mem" are
generated without -Oz.  This patch:

1. Add "*mov<mode>_or_store" for "or $-1,mem".
2. Rename "*mov<mode>_or" to "*mov<mode>_or_load", replacing
nonimmediate_operand with register_operand.
3. Enable "*mov<mode>_and" and "*mov<mode>_or_store" only for -Oz.

gcc/

	PR target/120427
	* config/i386/i386.md (*mov<mode>_and): Enable only for -Oz.
	(*mov<mode>_or_store): New.
	(*mov<mode>_or): Renamed to ...
	(*mov<mode>_or_load): This.  Replace nonimmediate_operand with
	register_operand.

gcc/testsuite/

	PR target/120427
	* gcc.target/i386/pr120427-1.c: New test.
	* gcc.target/i386/pr120427-2.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386.md                    | 18 +++++++++++---
 gcc/testsuite/gcc.target/i386/pr120427-1.c | 28 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr120427-2.c | 28 ++++++++++++++++++++++
 3 files changed, 71 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-2.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b7a18d583da..e55dd27cfcf 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2442,14 +2442,26 @@ (define_insn "*mov<mode>_and"
   [(set (match_operand:SWI248 0 "memory_operand" "=m")
 	(match_operand:SWI248 1 "const0_operand"))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
+  "reload_completed
+   && optimize_insn_for_size_p () && optimize_size > 1"
   "and{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
 
-(define_insn "*mov<mode>_or"
-  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
+(define_insn "*mov<mode>_or_store"
+  [(set (match_operand:SWI248 0 "memory_operand" "=m")
+	(match_operand:SWI248 1 "constm1_operand"))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && optimize_insn_for_size_p () && optimize_size > 1"
+  "or{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "1")])
+
+(define_insn "*mov<mode>_or_load"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
 	(match_operand:SWI248 1 "constm1_operand"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed"
diff --git a/gcc/testsuite/gcc.target/i386/pr120427-1.c b/gcc/testsuite/gcc.target/i386/pr120427-1.c
new file mode 100644
index 00000000000..7f1690e49b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120427-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=sapphirerapids" } */
+/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]+\\\$0, \[0-9\]*\\(" } } */
+
+struct __pthread_mutex_s
+{
+  int __lock;
+  unsigned int __count;
+  int __owner;
+  unsigned int __nusers;
+  int __kind;
+  short __spins;
+  short __elision;
+  void *p[2];
+};
+typedef union
+{
+  struct __pthread_mutex_s __data;
+  char __size[40];
+  long int __align;
+} pthread_mutex_t;
+typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t;
+void
+foo (__rtld_lock_recursive_t *lock, int i)
+{
+  lock[i] = (__rtld_lock_recursive_t) {{ { 0, 0, 0, 0, 1,
+      0, 0, { ((void *)0) , ((void *)0) } } }};
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr120427-2.c b/gcc/testsuite/gcc.target/i386/pr120427-2.c
new file mode 100644
index 00000000000..a380c128ccb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120427-2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=sapphirerapids" } */
+/* { dg-final { scan-assembler-not "or\[lq\]?\[\\t \]+\\\$-1, \[0-9\]*\\(" } } */
+
+struct __pthread_mutex_s
+{
+  int __lock;
+  unsigned int __count;
+  int __owner;
+  unsigned int __nusers;
+  int __kind;
+  short __spins;
+  short __elision;
+  void *p[2];
+};
+typedef union
+{
+  struct __pthread_mutex_s __data;
+  char __size[40];
+  long int __align;
+} pthread_mutex_t;
+typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t;
+void
+foo (__rtld_lock_recursive_t *lock, int i)
+{
+  lock[i] = (__rtld_lock_recursive_t) {{ { -1, -1, -1, -1, 1,
+      -1, -1, { ((void *)-1) , ((void *)-1) } } }};
+}
-- 
2.49.0

Reply via email to