https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121138

            Bug ID: 121138
           Summary: [16 Regression] missed optimization in the loop caused
                    by r16-303-g1a13684dfc7286
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
                CC: rguenther at suse dot de
            Blocks: 120003
  Target Milestone: ---

For

extern union {
  int i;
  float f;
} int_as_float_u;

extern int render_result_from_bake_w;
extern int render_result_from_bake_h_seed_pass;
extern float *render_result_from_bake_h_primitive;
extern float *render_result_from_bake_h_seed;

float
int_as_float(int i)
{
  int_as_float_u.i = i;
  return int_as_float_u.f;
}

void
render_result_from_bake_h(int tx)
{
  while (render_result_from_bake_w) {
    for (; tx < render_result_from_bake_w; tx++)
      render_result_from_bake_h_primitive[1] =
          render_result_from_bake_h_primitive[2] = int_as_float(-1);
    if (render_result_from_bake_h_seed_pass) {
      *render_result_from_bake_h_seed = 0;
    }
  }
}

GCC 16 generates:

[hjl@gnu-cfl-3 gcc]$ ./xgcc -B./ -S /tmp/x.c -O2 -march=x86-64-v3 
[hjl@gnu-cfl-3 gcc]$ cat x.s
        .file   "x.c"
        .text
        .p2align 4
        .globl  int_as_float
        .type   int_as_float, @function
int_as_float:
.LFB0:
        .cfi_startproc
        movl    %edi, int_as_float_u(%rip)
        vmovd   %edi, %xmm0
        ret
        .cfi_endproc
.LFE0:
        .size   int_as_float, .-int_as_float
        .p2align 4
        .globl  render_result_from_bake_h
        .type   render_result_from_bake_h, @function
render_result_from_bake_h:
.LFB1:
        .cfi_startproc
        movl    render_result_from_bake_w(%rip), %edx
        testl   %edx, %edx
        je      .L17
        movq    render_result_from_bake_h_primitive(%rip), %rcx
        movl    render_result_from_bake_h_seed_pass(%rip), %esi
        movq    render_result_from_bake_h_seed(%rip), %r8
        cmpl    %edx, %edi
        jge     .L18
        movq    $-1, %rax
        movl    %edx, %edi
        movq    %rax, 4(%rcx)
        testl   %esi, %esi
        je      .L10
.L7:
        movl    $0x00000000, (%r8)
        cmpl    %edi, %edx
        jle     .L7
        movq    %rax, 4(%rcx)
        movl    %edx, %edi
        jmp     .L7
.L18:
        movq    $-1, %rax
        testl   %esi, %esi
        jne     .L7
        .p2align 2
        .p2align 4
        .p2align 3
.L10:
        cmpl    %edi, %edx
        jle     .L10
        movq    %rax, 4(%rcx)
        movl    %edx, %edi
        jmp     .L10
.L17:
        ret
        .cfi_endproc
.LFE1:
        .size   render_result_from_bake_h, .-render_result_from_bake_h
        .ident  "GCC: (GNU) 16.0.0 20250717 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-3 gcc]$ 

If r16-303-g1a13684dfc7286 is reverted, we generate

[hjl@gnu-cfl-3 gcc]$ ./xgcc -B./ -S /tmp/x.c -O2 -march=x86-64-v3 
[hjl@gnu-cfl-3 gcc]$ cat x.s
        .file   "x.c"
        .text
        .p2align 4
        .globl  int_as_float
        .type   int_as_float, @function
int_as_float:
.LFB0:
        .cfi_startproc
        movl    %edi, int_as_float_u(%rip)
        vmovd   %edi, %xmm0
        ret
        .cfi_endproc
.LFE0:
        .size   int_as_float, .-int_as_float
        .p2align 4
        .globl  render_result_from_bake_h
        .type   render_result_from_bake_h, @function
render_result_from_bake_h:
.LFB1:
        .cfi_startproc
        movl    render_result_from_bake_w(%rip), %eax
        testl   %eax, %eax
        je      .L11
        movq    render_result_from_bake_h_primitive(%rip), %rcx
        movl    render_result_from_bake_h_seed_pass(%rip), %r8d
        movq    $-1, %rdx
        movq    render_result_from_bake_h_seed(%rip), %rsi
        .p2align 4
        .p2align 4
        .p2align 3
.L5:
        cmpl    %edi, %eax
        jle     .L9
        movq    %rdx, 4(%rcx)
        movl    %eax, %edi
.L9:
        testl   %r8d, %r8d
        je      .L5
        .p2align 4
        .p2align 4
        .p2align 3
.L6:
        movl    $0x00000000, (%rsi)
        cmpl    %edi, %eax
        jle     .L6
        movq    %rdx, 4(%rcx)
        movl    %eax, %edi
        jmp     .L6
.L11:
        ret
        .cfi_endproc
.LFE1:
        .size   render_result_from_bake_h, .-render_result_from_bake_h
        .ident  "GCC: (GNU) 16.0.0 20250717 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-3 gcc]$


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120003
[Bug 120003] [13/14 Regression] missed optimization around a loop with a
checker since r12-3453-g01b5038718056b

Reply via email to