https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114169
Bug ID: 114169
Summary: miss optimization of repeat load&store in place
Product: gcc
Version: 13.2.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: rtl-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: absoler at smail dot nju.edu.cn
Target Milestone: ---
Hi, here's the code:
```
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed short int int16_t;
typedef unsigned short int uint16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef signed long int int64_t;
typedef unsigned long int uint64_t;
#include<stdlib.h>
#include<signal.h>
#include<stdio.h>
#include<string.h>
/* --- Struct/Union Declarations --- */
struct S0 {
int32_t f0;
uint16_t f1;
int32_t f2;
int32_t f3;
uint16_t f4;
uint16_t f5;
int32_t f6;
int16_t f7;
};
/* --- GLOBAL VARIABLES --- */
struct S0 g_2 =
{1L,0xF8C7L,0x5C6EFF3DL,0x0369BD69L,65535UL,0x0CA9L,-9L,0x9C92L};
struct S0 g_4 =
{-1L,0UL,0x314A5EA9L,0x4A90C6D2L,0xCD43L,65528UL,0x2E40C18AL,0x9C27L};
int16_t g_17 = 0xF85AL;
uint16_t g_18 = 0xA88AL;
const uint64_t g_19 = 0UL;
/* --- FORWARD DECLARATIONS --- */
struct S0 func_1(void);
void func_10(struct S0 p_13);
struct S0 func_1() {
int32_t a;
g_4 = g_2;
func_10(g_4);
}
void func_10(struct S0 b) {
int32_t c = 0;
int32_t *d = &g_4.f0;
struct S0 *e = &g_2;
*d = c;
*e = b;
}
```
compiled with gcc-13.2.0 -O3, it generates:
https://godbolt.org/z/4d9roGWTz
```
0000000000401630 <func_1>:
func_1():
/root/loadtest3/test/output2.c:49
401630: movdqa 0x2a58(%rip),%xmm0 # 404090 <g_2>
401638: mov 0x2a52(%rip),%eax # 404090 <g_2> # load
40163e: movdqu 0x2a56(%rip),%xmm1 # 40409c <g_2+0xc>
401646: movaps %xmm0,0x2a23(%rip) # 404070 <g_4>
func_10():
/root/loadtest3/test/output2.c:57
40164d: mov %eax,0x2a3d(%rip) # 404090 <g_2> # store
401653: movzwl 0x2a1a(%rip),%eax # 404074 <g_4+0x4>
func_1():
/root/loadtest3/test/output2.c:49
40165a: movups %xmm1,0x2a1b(%rip) # 40407c <g_4+0xc>
func_10():
/root/loadtest3/test/output2.c:57
401661: mov %ax,0x2a2c(%rip) # 404094 <g_2+0x4>
401668: mov 0x2a09(%rip),%rax # 404078 <g_4+0x8>
/root/loadtest3/test/output2.c:56
40166f: movl $0x0,0x29f7(%rip) # 404070 <g_4>
/root/loadtest3/test/output2.c:57
401679: mov %rax,0x2a18(%rip) # 404098 <g_2+0x8>
401680: mov 0x29f9(%rip),%rax # 404080 <g_4+0x10>
401687: mov %rax,0x2a12(%rip) # 4040a0 <g_2+0x10>
40168e: movzwl 0x29f3(%rip),%eax # 404088 <g_4+0x18>
401695: mov %ax,0x2a0c(%rip) # 4040a8 <g_2+0x18>
func_1():
/root/loadtest3/test/output2.c:51
40169c: mov %rdi,%rax
40169f: retq
```
we can see the pair of load&store at address 0x401638 and 0x40164d is
unnecessary