On Sun, May 25, 2025 at 8:12 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Sun, May 25, 2025 at 7:47 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > commit ef26c151c14a87177d46fd3d725e7f82e040e89f > > Author: Roger Sayle <ro...@nextmovesoftware.com> > > Date: Thu Dec 23 12:33:07 2021 +0000 > > > > x86: PR target/103773: Fix wrong-code with -Oz from pop to memory. > > > > transformed "mov $0,mem" to the shorter and "$0,mem" for -Oz. But > > > > (define_insn "*mov<mode>_and" > > [(set (match_operand:SWI248 0 "memory_operand" "=m") > > (match_operand:SWI248 1 "const0_operand")) > > (clobber (reg:CC FLAGS_REG))] > > "reload_completed" > > "and{<imodesuffix>}\t{%1, %0|%0, %1}" > > [(set_attr "type" "alu1") > > (set_attr "mode" "<MODE>") > > (set_attr "length_immediate" "1")]) > > > > isn't guarded for -Oz. As a result, "and $0,mem" is generated without > > -Oz. Enable *mov<mode>_and only for -Oz. > > > > gcc/ > > > > PR target/120427 > > * config/i386/i386.md (*mov<mode>_and): Enable only for -Oz. > > > > gcc/testsuite/ > > > > PR target/120427 > > * gcc.target/i386/pr120427.c: New test. > > > > OK for master? > > > > "mov $-1,mem" has the same issue. Here is the updated patch to also > enable "or $-1,mem" only for -Oz. > > OK for master?
It doesn't work since "*mov<mode>_or" was extended from load. Here is the v2 patch: 1. Add "*mov<mode>_or_store" for "or $-1,mem". 2. Rename "*mov<mode>_or" to "*mov<mode>_or_load", replacing nonimmediate_operand with register_operand. 3. Enable "*mov<mode>_and" and "*mov<mode>_or_store" only for -Oz. Tested on x86-64. -- H.J.
From be013c2d0bde068804fda3db6b05c89d7a26d54e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sun, 25 May 2025 07:40:29 +0800 Subject: [PATCH v2] x86: Enable *mov<mode>_(and|or_store) only for -Oz commit ef26c151c14a87177d46fd3d725e7f82e040e89f Author: Roger Sayle <ro...@nextmovesoftware.com> Date: Thu Dec 23 12:33:07 2021 +0000 x86: PR target/103773: Fix wrong-code with -Oz from pop to memory. added "*mov<mode>_and" and extended "*mov<mode>_or" to transform "mov $0,mem" to the shorter "and $0,mem" and "mov $-1,mem" to the shorter "or $-1,mem" for -Oz. But the new pattern: (define_insn "*mov<mode>_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "and{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) and the extended pattern: (define_insn "*mov<mode>_or" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) aren't guarded for -Oz. As a result, "and $0,mem" and "or $-1,mem" are generated without -Oz. This patch: 1. Add "*mov<mode>_or_store" for "or $-1,mem". 2. Rename "*mov<mode>_or" to "*mov<mode>_or_load", replacing nonimmediate_operand with register_operand. 3. Enable "*mov<mode>_and" and "*mov<mode>_or_store" only for -Oz. gcc/ PR target/120427 * config/i386/i386.md (*mov<mode>_and): Enable only for -Oz. (*mov<mode>_or_store): New. (*mov<mode>_or): Renamed to ... (*mov<mode>_or_load): This. Replace nonimmediate_operand with register_operand. gcc/testsuite/ PR target/120427 * gcc.target/i386/pr120427-1.c: New test. * gcc.target/i386/pr120427-2.c: Likewise. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386.md | 18 +++++++++++--- gcc/testsuite/gcc.target/i386/pr120427-1.c | 28 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr120427-2.c | 28 ++++++++++++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-2.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b7a18d583da..e55dd27cfcf 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2442,14 +2442,26 @@ (define_insn "*mov<mode>_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "reload_completed + && optimize_insn_for_size_p () && optimize_size > 1" "and{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) -(define_insn "*mov<mode>_or" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") +(define_insn "*mov<mode>_or_store" + [(set (match_operand:SWI248 0 "memory_operand" "=m") + (match_operand:SWI248 1 "constm1_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && optimize_insn_for_size_p () && optimize_size > 1" + "or{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "1")]) + +(define_insn "*mov<mode>_or_load" + [(set (match_operand:SWI248 0 "register_operand" "=r") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" diff --git a/gcc/testsuite/gcc.target/i386/pr120427-1.c b/gcc/testsuite/gcc.target/i386/pr120427-1.c new file mode 100644 index 00000000000..7f1690e49b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=sapphirerapids" } */ +/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]+\\\$0, \[0-9\]*\\(" } } */ + +struct __pthread_mutex_s +{ + int __lock; + unsigned int __count; + int __owner; + unsigned int __nusers; + int __kind; + short __spins; + short __elision; + void *p[2]; +}; +typedef union +{ + struct __pthread_mutex_s __data; + char __size[40]; + long int __align; +} pthread_mutex_t; +typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; +void +foo (__rtld_lock_recursive_t *lock, int i) +{ + lock[i] = (__rtld_lock_recursive_t) {{ { 0, 0, 0, 0, 1, + 0, 0, { ((void *)0) , ((void *)0) } } }}; +} diff --git a/gcc/testsuite/gcc.target/i386/pr120427-2.c b/gcc/testsuite/gcc.target/i386/pr120427-2.c new file mode 100644 index 00000000000..a380c128ccb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=sapphirerapids" } */ +/* { dg-final { scan-assembler-not "or\[lq\]?\[\\t \]+\\\$-1, \[0-9\]*\\(" } } */ + +struct __pthread_mutex_s +{ + int __lock; + unsigned int __count; + int __owner; + unsigned int __nusers; + int __kind; + short __spins; + short __elision; + void *p[2]; +}; +typedef union +{ + struct __pthread_mutex_s __data; + char __size[40]; + long int __align; +} pthread_mutex_t; +typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; +void +foo (__rtld_lock_recursive_t *lock, int i) +{ + lock[i] = (__rtld_lock_recursive_t) {{ { -1, -1, -1, -1, 1, + -1, -1, { ((void *)-1) , ((void *)-1) } } }}; +} -- 2.49.0