Issue 142497
Summary missed optimization for ceiling division with known ranges
Labels new issue
Assignees
Reporter alex
    My starting point was the following Rust code (meaning is hopefully clear even to non-rust speakers):
```
pub fn src(v: u32) -> u32 {
    (u32::BITS - v.leading_zeros()).div_ceil(8)
}
```

rustc emits the following LLVM IR:

```
define noundef range(i32 0, 6) i32 @src(i32 noundef %v) unnamed_addr #0 {
start:
  %0 = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %v, i1 false)
  %_2 = sub nuw nsw i32 32, %0
  %_41 = lshr i32 %_2, 3
  %_5 = and i32 %_2, 7
  %_6.not = icmp ne i32 %_5, 0
  %1 = zext i1 %_6.not to i32
  %_0.sroa.0.0 = add nuw nsw i32 %_41, %1
  ret i32 %_0.sroa.0.0
}
```

Which emits the following x86:

```
src: # @src
        mov     ecx, 63
        bsr ecx, edi
        xor     ecx, -32
        add     ecx, 33
        mov eax, ecx
        shr     eax, 3
        and     ecx, 7
        cmp ecx, 1
        sbb     eax, -1
        ret
```

however, this could be validly optimized to the following LLVM-IR:

```
define noundef range(i32 0, 5) i32 @tgt(i32 noundef %v) unnamed_addr #0 {
start:
  %0 = tail call i32 @llvm.ctlz.i32(i32 %v, i1 false)
  %1 = sub nuw nsw i32 32, %0
  %2 = add nuw nsw i32 %1, 7
  %3 = lshr i32 %2, 3
  ret i32 %3
}
```

which produces the following, much tighter x86:

```
tgt: # @tgt
        mov     eax, 63
        bsr     eax, edi
 xor     eax, -32
        add     eax, 40
        shr     eax, 3
 ret
```

alive2 showing that the transformation is valid: https://alive2.llvm.org/ce/z/Ys4qAy

(As a bit of interest, I found the optimized versions using claude. Computers are wild: https://claude.ai/share/d998511d-45ee-4132-bee4-fe7f70350a67)


_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to