[Bug tree-optimization/114318] New: Missing Optimization after multiple function calls

2024-03-12 Thread carnet at student dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114318

Bug ID: 114318
   Summary: Missing Optimization after multiple function calls
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: carnet at student dot ethz.ch
  Target Milestone: ---

GCC -O3 does not inline the function if there are more than two calls. This
also only happens in the main function. Changing the name of the main function
and/or reducing the number of function calls, enables the optimization.

https://godbolt.org/z/KM71WrYac

Source
int a, c, d;
int *b, *e;
static int *f() {
  int *g = 
  int **h = 
  *g = 1;
  *h = 
  if (*b)
for (; d; d = d + 8)
  ;
  return 
}
int main() {
  f();
  f();
  f();
}

x86 -O3 Assembly:
f.part.0:
movld(%rip), %eax
testl   %eax, %eax
je  .L1
movl$0, d(%rip)
.L1:
ret
main:
movl$1, d(%rip)
movlc(%rip), %ecx
testl   %ecx, %ecx
je  .L5
xorl%eax, %eax
callf.part.0
.L5:
movl$1, d(%rip)
movlc(%rip), %edx
testl   %edx, %edx
je  .L6
xorl%eax, %eax
callf.part.0
.L6:
movl$1, d(%rip)
movlc(%rip), %eax
movq$c, b(%rip)
testl   %eax, %eax
je  .L7
xorl%eax, %eax
callf.part.0
.L7:
xorl%eax, %eax
ret
e:
.zero   8
b:
.zero   8
d:
.zero   4
c:
.zero   4
a:
.zero   4

[Bug tree-optimization/114317] New: Missing optimization for multiple condition statements

2024-03-12 Thread carnet at student dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114317

Bug ID: 114317
   Summary: Missing optimization for multiple condition statements
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: carnet at student dot ethz.ch
  Target Milestone: ---

GCC -O3 is not able to optimize the [b && (g = 0)] statement even though g is
static and 0. So the statement does not change the value of g. This further
prevents the optimiziation of the later statements.

https://godbolt.org/z/r7P7qajYn

Source:
int b;
int *e = 
static int g = 0;

int main() {
b && (g = 0);
if (3 - g){
*e = b;
}
if (253 - (9 | g)){
*e = b;
}
return *e;
}

x86 -O3 Assembly:
main:
movlb(%rip), %eax
testl   %eax, %eax
je  .L2
movqe(%rip), %rdx
xorl%edi, %edi
movl%edi, g(%rip)
movl%eax, (%rdx)
movlb(%rip), %eax
.L3:
movl%eax, (%rdx)
ret
.L2:
movlg(%rip), %ecx
movqe(%rip), %rdx
cmpl$3, %ecx
je  .L3
xorl%esi, %esi
orl $9, %ecx
movl%esi, (%rdx)
cmpl$253, %ecx
jne .L8
ret
.L8:
movlb(%rip), %eax
jmp .L3
e:
.quad   b
b:
.zero   4

[Bug tree-optimization/114162] New: Missing Optimization: Loop is vectorized instead of removed

2024-02-29 Thread carnet at student dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114162

Bug ID: 114162
   Summary: Missing Optimization: Loop is vectorized instead of
removed
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: carnet at student dot ethz.ch
  Target Milestone: ---

This simple for-loop is compiled to complex vectorized code instead of
optmizing the loop like clang is able to.

https://godbolt.org/z/c3fjWbWEz

Source:
unsigned short a;
int main() {
int counter = 0;
// n and a are mutliples of 4
int n = 4;
a *= n;
for (; a > 0; a += n)
counter++;
return counter;
}
x86 -O3 Assembly:
main:
movzwl  a(%rip), %edx
salw$2, %dx
je  .L24
movl$-4, %ecx
subl%edx, %ecx
cmpw$52, %cx
jbe .L15
shrw$2, %cx
movd%edx, %xmm0
movl$8, %edi
xorl%eax, %eax
addl$1, %ecx
punpcklwd   %xmm0, %xmm0
movd%edi, %xmm3
movdqa  .LC0(%rip), %xmm1
movl%ecx, %esi
pshufd  $0, %xmm0, %xmm0
movl$2097184, %edi
pshufd  $0, %xmm3, %xmm3
shrw$3, %si
paddw   .LC1(%rip), %xmm0
movd%edi, %xmm2
movzwl  %si, %esi
pshufd  $0, %xmm2, %xmm2
.L5:
addl$1, %eax
movdqa  %xmm1, %xmm4
movdqa  %xmm0, %xmm5
paddd   %xmm3, %xmm1
paddw   %xmm2, %xmm0
cmpl%eax, %esi
jne .L5
movl%ecx, %esi
movl%ecx, %eax
andl$-8, %esi
andl$32760, %eax
andl$7, %ecx
leal(%rdx,%rsi,4), %edx
je  .L6
cmpw$-4, %dx
je  .L25
cmpw$-8, %dx
je  .L26
cmpw$-12, %dx
je  .L27
cmpw$-16, %dx
je  .L28
cmpw$-20, %dx
je  .L29
cmpw$-24, %dx
je  .L30
addl$7, %eax
addl$28, %edx
.L14:
movw%dx, a(%rip)
ret
.L26:
addl$2, %eax
xorl%edx, %edx
jmp .L14
.L24:
movw$0, a(%rip)
xorl%eax, %eax
ret
.L15:
xorl%eax, %eax
andb$4, %cl
jne .L4
movl$1, %eax
addw$4, %dx
je  .L14
.L4:
addl$2, %eax
addw$8, %dx
jne .L4
jmp .L14
.L6:
paddd   .LC5(%rip), %xmm4
paddw   .LC7(%rip), %xmm5
pshufd  $255, %xmm4, %xmm0
pextrw  $7, %xmm5, %edx
movd%xmm0, %eax
jmp .L14
.L25:
addl$1, %eax
xorl%edx, %edx
jmp .L14
.L27:
addl$3, %eax
xorl%edx, %edx
jmp .L14
.L28:
addl$4, %eax
xorl%edx, %edx
jmp .L14
.L29:
addl$5, %eax
xorl%edx, %edx
jmp .L14
.L30:
addl$6, %eax
xorl%edx, %edx
jmp .L14
a:
.zero   2
.LC0:
.long   0
.long   1
.long   2
.long   3
.LC1:
.value  0
.value  4
.value  8
.value  12
.value  16
.value  20
.value  24
.value  28
.LC5:
.long   5
.long   5
.long   5
.long   5
.LC7:
.long   262148
.long   262148
.long   262148
.long   262148

[Bug tree-optimization/114161] New: Missing Loop Optimization for Unexecuted Loop

2024-02-29 Thread carnet at student dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114161

Bug ID: 114161
   Summary: Missing Loop Optimization for Unexecuted Loop
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: carnet at student dot ethz.ch
  Target Milestone: ---

In the following code the for-loop is never executed because 'a' is initialized
to zero. GCC -O3 and -Os compile to a loop while clang is able to optimize it
away.

https://godbolt.org/z/G7r4585o5

Source:
static int a = 0;
static char b;
static short(c)(short d, short e) { return d - e; }
int foo() {
for (; a; a = c(a, 7))
;
return a;
}

x86 -O3 Assembly
foo:
movla(%rip), %eax
testl   %eax, %eax
je  .L2
leal-7(%rax), %edx
testb   $1, %al
je  .L3
movswl  %dx, %eax
testl   %eax, %eax
je  .L13
.L3:
subl$14, %eax
cwtl
testl   %eax, %eax
jne .L3
.L13:
movl$0, a(%rip)
.L2:
xorl%eax, %eax
ret

[Bug tree-optimization/113423] New: Missed Optimization: potential redundant load

2024-01-16 Thread carnet at student dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113423

Bug ID: 113423
   Summary: Missed Optimization: potential redundant load
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: carnet at student dot ethz.ch
  Target Milestone: ---

https://godbolt.org/z/jbbEPePhv

Source Code:
int a;
int *b;
int c = 0;

void foo() {
  if (b)
c = 3;
  a = c;
}


int bar(){
return c;
}

foo:
cmpq$0, b(%rip)
movlc(%rip), %eax <= eax is overwritten when the jump is
not taken, potentially redundant load
je  .L3
movl$3, c(%rip) 
movl$3, %eax
.L3:
movl%eax, a(%rip)
ret


%eax is written before the branch. If the branch is not taken, eax is
overwritten. Which makes the highlighted load potentially unnecessary.

[Bug tree-optimization/113422] New: Missed optimizations in the presence of pointer chains

2024-01-16 Thread carnet at student dot ethz.ch via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113422

Bug ID: 113422
   Summary: Missed optimizations in the presence of pointer chains
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: carnet at student dot ethz.ch
  Target Milestone: ---

https://godbolt.org/z/hdWKn4bjc

All three functions write to the same variable (b). Clang is able to optimize
this. GCC -O3 cannot always do this.
Assembly for foo is writing to **d instead to directly write 1 to b. There is
similar behavior for bar and baz. Clang optimizes the code to directly write to
b.
Baz is fully optimized only if the foo and bar are removed.

int b = 0;
static int *c = 
static int **d = 
static int ***e = 

void foo() {***e = 1;}
void bar() {**d = 1;}
void baz() {*c = 1;}

Assembly code:
foo:
movqd(%rip), %rax
movq(%rax), %rax
movl$1, (%rax)
ret