https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109874
Bug ID: 109874
Summary: [SH] GCC 13's -Os code is 50% bigger than GCC 4's
Product: gcc
Version: 13.1.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: paul at crapouillou dot net
Target Milestone: ---
Using the following C code snippet:
------
unsigned int CHRmask1,CHRmask2,CHRmask4,CHRmask8;
void SetupCartCHRMapping(unsigned int size)
{
#if 0
CHRmask1 = (size >> 10) - 1;
CHRmask2 = (size >> 11) - 1;
CHRmask4 = (size >> 12) - 1;
CHRmask8 = (size >> 13) - 1;
#else
size >>= 10;
CHRmask1 = size - 1;
size >>= 1;
CHRmask2 = size - 1;
size >>= 1;
CHRmask4 = size - 1;
size >>= 1;
CHRmask8 = size - 1;
#endif
}
------
Compiling with -Os, GCC 13.1 will generate the exact same code for the two
cases, as it rightfully detects that they are functionally the same:
------
_SetupCartCHRMapping:
mov.l r12,@-r15
mova .L3,r0
mov.l .L3,r12
mov r4,r1
shlr8 r1
add r0,r12
mov.l .L4,r0
shlr2 r1
add #-1,r1
mov.l r1,@(r0,r12)
mov r4,r1
shlr8 r1
mov.l .L5,r0
shlr r1
shlr2 r1
add #-1,r1
mov.l r1,@(r0,r12)
mov r4,r1
shlr8 r1
mov.l .L6,r0
shlr2 r1
shlr2 r1
shlr8 r4
add #-1,r1
shlr2 r4
mov.l r1,@(r0,r12)
shlr r4
mov.l .L7,r0
shlr2 r4
add #-1,r4
mov.l r4,@(r0,r12)
rts
mov.l @r15+,r12
.L3:
.long _GLOBAL_OFFSET_TABLE_
.L4:
.long _CHRmask1@GOTOFF
.L5:
.long _CHRmask2@GOTOFF
.L6:
.long _CHRmask4@GOTOFF
.L7:
.long _CHRmask8@GOTOFF
_CHRmask8:
.zero 4
_CHRmask4:
.zero 4
_CHRmask2:
.zero 4
_CHRmask1:
.zero 4
------
The code part (excluding labels and data fields) is 33 instructions.
GCC 4.9.4 won't detect that the two versions of the code are equivalent, and
generate different machine code for them. The second version generates the
smallest code, at only 21 instructions:
------
_SetupCartCHRMapping:
shlr8 r4
shlr2 r4
mov.l .L2,r1
mov r4,r2
add #-1,r2
mov.l r2,@r1
mov r4,r1
mov.l .L3,r2
shlr r1
add #-1,r1
mov.l r1,@r2
shlr2 r4
mov.l .L4,r1
mov r4,r2
add #-1,r2
mov.l r2,@r1
shlr r4
mov.l .L5,r1
add #-1,r4
rts
mov.l r4,@r1
.L2:
.long _CHRmask1
.L3:
.long _CHRmask2
.L4:
.long _CHRmask4
.L5:
.long _CHRmask8
------
So GCC 13.1 at -Os generates code that is 50% bigger than what GCC 4 would
generate for a functionally equivalent algorithm.