Issue 165544
Summary Hazard when switching `gt` to `ge` when `+1` is involved
Labels new issue
Assignees
Reporter Validark
    [Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:11,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:44,endLineNumber:2,positionColumn:44,positionLineNumber:2,selectionStartColumn:44,selectionStartLineNumber:2,startColumn:44,startLineNumber:2),source:'export+fn+foo(a:+u32,+b:+u32,+c:+u32,+d:+u32)+u32+%7B%0A++++const+e+%3D+d+%2B+a+%2B+1%3B+//+add_plus_1(d,+a)%0A++++const+f+%3D+c+%3E%3D+e%3B%0A++++const+g+%3D+if+(f)+e+else+d%3B%0A++++const+h+%3D+b+%2B+@intFromBool(f)%3B%0A++++return+g+%5E+h%3B%0A%7D%0A%0Afn+add_plus_1(a:+u32,+b:+u32)+u32+%7B%0A++++return+asm+(%22leal+1(%25%5Ba%5D,+%25%5Bb%5D,+1),+%25%5Bret%5D%22%0A++++++++:+%5Bret%5D+%22%3Dr%22+(-%3E+u32),%0A++++++++:+%5Ba%5D+%22r%22+(a),%0A++++++++++%5Bb%5D+%22r%22+(b),%0A++++)%3B%0A%7D'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:60.99191060755993,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:z0151,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:2,lang:zig,libs:!(),options:'-O+ReleaseFast+-target+x86_64-linux+-mcpu%3Dznver5+-fomit-frame-pointer',overrides:!(),selection:(endColumn:13,endLineNumber:3,positionColumn:13,positionLineNumber:3,selectionStartColumn:13,selectionStartLineNumber:3,startColumn:13,startLineNumber:3),source:1),l:'5',n:'0',o:'+zig+0.15.1+(Editor+%231)',t:'0')),k:39.008089392440084,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
```zig
export fn foo(a: u32, b: u32, c: u32, d: u32) u32 {
    const e = d + a + 1;
 const f = c >= e;
    const g = if (f) e else d;
    const h = b + @intFromBool(f);
    return g ^ h;
}
```

Emits:

```asm
foo:
 lea     r8d, [rcx + rdi]
        lea     edi, [rcx + rdi + 1]
        xor eax, eax
        cmp     edx, r8d
        seta    al
        cmovbe edi, ecx
        add     eax, esi
        xor     eax, edi
 ret
```

Should be:

```asm
foo:
        lea     eax, [rcx + rdi + 1]
        cmp     edx, eax
        cmovb   eax, ecx
        sbb     esi, -1
        xor     eax, esi
        ret
```

Unoptimized LLVM IR:

```llvm
; ModuleID = 'main'
source_filename = "main"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux6.15.1-gnu2.42.0"

@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@start.simplified_logic = internal unnamed_addr constant i1 false, align 1
@builtin.output_mode = internal unnamed_addr constant i2 1, align 1
@builtin.link_mode = internal unnamed_addr constant i1 false, align 1

; Function Attrs: nounwind uwtable
define dso_local i32 @foo(i32 %0, i32 %1, i32 %2, i32 %3) #0 {
4:
 %5 = add nuw i32 %3, %0
  %6 = add nuw i32 %5, 1
  %7 = icmp uge i32 %2, %6
  br i1 %7, label %13, label %14

8:
  %9 = phi i32 [ %6, %13 ], [ %3, %14 ]
  %10 = zext i1 %7 to i32
  %11 = add nuw i32 %1, %10
  %12 = xor i32 %9, %11
  ret i32 %12

13:
  br label %8

14:
  br label %8
}

attributes #0 = { nounwind uwtable "frame-pointer"="all" "target-cpu"="znver5" "target-features"="+64bit,+adx,+aes,+allow-light-256-bit,+avx,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vp2intersect,+avx512vpopcntdq,+avxvnni,+bmi,+bmi2,+branchfusion,+clflushopt,+clwb,+clzero,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fast-15bytenop,+fast-bextr,+fast-dpwssd,+fast-imm16,+fast-lzcnt,+fast-movbe,+fast-scalar-fsqrt,+fast-scalar-shift-masks,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,+fma,+fsgsbase,+fsrm,+fxsr,+gfni,+idivq-to-divl,+invpcid,+lzcnt,+macrofusion,+mmx,+movbe,+movdir64b,+movdiri,+mwaitx,+nopl,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sbb-dep-breaking,+sha,+shstk,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+vzeroupper,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-16bit-mode,-32bit-mode,-amx-avx512,-amx-bf16,-amx-complex,-amx-fp16,-amx-fp8,-amx-int8,-amx-movrs,-amx-tf32,-amx-tile,-amx-transpose,-avx10.1-512,-avx10.2-512,-avx512fp16,-avxifma,-avxneconvert,-avxvnniint16,-avxvnniint8,-branch-hint,-ccmp,-cf,-cldemote,-cmpccxadd,-egpr,-enqcmd,-ermsb,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,-fast-7bytenop,-fast-gather,-fast-hops,-fast-shld-rotate,-fast-variable-crosslane-shuffle,-fast-vector-shift-masks,-faster-shift-than-shuffle,-fma4,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,-inline-asm-use-gpr32,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,-movrs,-ndd,-nf,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,-pad-short-functions,-pconfig,-ppx,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefer-no-gather,-prefer-no-scatter,-prefetchi,-ptwrite,-push2pop2,-raoint,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,-serialize,-seses,-sgx,-sha512,-slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,-sse-unaligned-mem,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-usermsr,-waitpkg,-widekl,-xop,-zu" }

!llvm.module.flags = !{}
```

Optimized LLVM IR:

```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "main"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux6.15.1-gnu2.42.0"

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local i32 @foo(i32 %0, i32 %1, i32 %2, i32 %3) local_unnamed_addr #0 {
  %5 = add nuw i32 %3, %0
  %6 = add nuw i32 %5, 1
  %7 = icmp ugt i32 %2, %5
  %. = select i1 %7, i32 %6, i32 %3
  %8 = zext i1 %7 to i32
  %9 = add nuw i32 %1, %8
  %10 = xor i32 %9, %.
  ret i32 %10
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="all" "target-cpu"="znver5" "target-features"="+64bit,+adx,+aes,+allow-light-256-bit,+avx,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vp2intersect,+avx512vpopcntdq,+avxvnni,+bmi,+bmi2,+branchfusion,+clflushopt,+clwb,+clzero,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fast-15bytenop,+fast-bextr,+fast-dpwssd,+fast-imm16,+fast-lzcnt,+fast-movbe,+fast-scalar-fsqrt,+fast-scalar-shift-masks,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,+fma,+fsgsbase,+fsrm,+fxsr,+gfni,+idivq-to-divl,+invpcid,+lzcnt,+macrofusion,+mmx,+movbe,+movdir64b,+movdiri,+mwaitx,+nopl,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sbb-dep-breaking,+sha,+shstk,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+vzeroupper,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-16bit-mode,-32bit-mode,-amx-avx512,-amx-bf16,-amx-complex,-amx-fp16,-amx-fp8,-amx-int8,-amx-movrs,-amx-tf32,-amx-tile,-amx-transpose,-avx10.1-512,-avx10.2-512,-avx512fp16,-avxifma,-avxneconvert,-avxvnniint16,-avxvnniint8,-branch-hint,-ccmp,-cf,-cldemote,-cmpccxadd,-egpr,-enqcmd,-ermsb,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,-fast-7bytenop,-fast-gather,-fast-hops,-fast-shld-rotate,-fast-variable-crosslane-shuffle,-fast-vector-shift-masks,-faster-shift-than-shuffle,-fma4,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,-inline-asm-use-gpr32,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,-movrs,-ndd,-nf,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,-pad-short-functions,-pconfig,-ppx,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefer-no-gather,-prefer-no-scatter,-prefetchi,-ptwrite,-push2pop2,-raoint,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,-serialize,-seses,-sgx,-sha512,-slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,-sse-unaligned-mem,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-usermsr,-waitpkg,-widekl,-xop,-zu" }

!llvm.module.flags = !{}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to