Issue 151791
Summary `assume`-`nonnull` operand bundles should optimize away more
Labels new issue
Assignees
Reporter scottmcm
    Today in rust we emit certain conversions using `assume(icmp ne %p, null)`: <https://rust.godbolt.org/z/WTGj3Ks9q> <https://github.com/rust-lang/rust/blob/63f6845e570305a92eaf855897768617366164d6/tests/codegen-llvm/intrinsics/transmute.rs#L380-L388>
```llvm
define { ptr, i64 } @check_pair_to_dst_ref(i64 noundef %x.0, i64 noundef %x.1) unnamed_addr {
start:
  %_0.0 = getelementptr i8, ptr null, i64 %x.0
  %0 = icmp ne ptr %_0.0, null
  call void @llvm.assume(i1 %0)
  %1 = insertvalue { ptr, i64 } poison, ptr %_0.0, 0
  %2 = insertvalue { ptr, i64 } %1, i64 %x.1, 1
  ret { ptr, i64 } %2
}
```

But since I hear extra uses from such `icmp`s can make optimization worse sometimes, I wanted to move to [assume operand bundles](https://llvm.org/docs/LangRef.html#assume-operand-bundles) instead, so made that change and now get what I think is correct from that,

```llvm
define { ptr, i64 } @check_pair_to_dst_ref(i64 noundef %x.0, i64 noundef %x.1) unnamed_addr #0 {
start:
  %_0.0 = getelementptr i8, ptr null, i64 %x.0
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %_0.0) ]
  %0 = insertvalue { ptr, i64 } poison, ptr %_0.0, 0
  %1 = insertvalue { ptr, i64 } %0, i64 %x.1, 1
  ret { ptr, i64 } %1
}
```

But that turned out to give bad consequences.  For example, what used to be a quite-good <https://rust.godbolt.org/z/vhY16Kavc>

```llvm
define void @long_integer_map(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([2048 x i8]) align 4 dereferenceable(2048) %_0, ptr noalias nocapture noundef readonly align 4 dereferenceable(2048) %x) unnamed_addr personality ptr @rust_eh_personality {
start:
  %array.i.i.i.i = alloca [2048 x i8], align 4
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %start ], [ %index.next, %vector.body ]
  %offset.idx = shl i64 %index, 2
  %next.gep = getelementptr i8, ptr %x, i64 %offset.idx
  %0 = getelementptr i8, ptr %next.gep, i64 16
  %wide.load = load <4 x i32>, ptr %next.gep, align 4
  %wide.load1 = load <4 x i32>, ptr %0, align 4
  %1 = mul <4 x i32> %wide.load, splat (i32 13)
  %2 = mul <4 x i32> %wide.load1, splat (i32 13)
  %3 = add <4 x i32> %1, splat (i32 7)
  %4 = add <4 x i32> %2, splat (i32 7)
  %5 = getelementptr inbounds nuw i32, ptr %array.i.i.i.i, i64 %index
  %6 = getelementptr inbounds nuw i8, ptr %5, i64 16
  store <4 x i32> %3, ptr %5, align 4
  store <4 x i32> %4, ptr %6, align 4
 %index.next = add nuw i64 %index, 8
  %7 = icmp eq i64 %index.next, 512
 br i1 %7, label %core::array::drain::drain_array_with::h75d8f8b0fda7bb41.exit, label %vector.body

core::array::drain::drain_array_with::h75d8f8b0fda7bb41.exit:
 call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(2048) %_0, ptr noundef nonnull align 4 dereferenceable(2048) %array.i.i.i.i, i64 2048, i1 false)
  ret void
}
```

Never removes *any* of the superfluous-after-inlining `assume`s, giving this obviously-silly IR:

```llvm
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite, inaccessiblemem: write) uwtable
define void @long_integer_map(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([2048 x i8]) align 4 dereferenceable(2048) %_0, ptr noalias nocapture noundef readonly align 4 dereferenceable(2048) %x) unnamed_addr #1 personality ptr @__CxxFrameHandler3 {
start:
  %array.i.i.i.i = alloca [2048 x i8], align 4
  %array1.i = alloca [2048 x i8], align 4
  call void @llvm.lifetime.start.p0(i64 2048, ptr nonnull %array1.i), !noalias !7
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(2048) %array1.i, ptr noundef nonnull readonly align 4 dereferenceable(2048) %x, i64 2048, i1 false), !noalias !11
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %array1.i) ]
  %0 = getelementptr inbounds nuw i8, ptr %array1.i, i64 2048
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %array1.i) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  %1 = getelementptr inbounds nuw i8, ptr %array1.i, i64 2048
  br label %vector.body

vector.body: ; preds = %vector.body, %start
  %index = phi i64 [ 0, %start ], [ %index.next, %vector.body ]
  %offset.idx = shl i64 %index, 2
  %2 = or disjoint i64 %offset.idx, 4
  %3 = or disjoint i64 %offset.idx, 8
  %4 = or disjoint i64 %offset.idx, 12
  %5 = or disjoint i64 %offset.idx, 16
  %6 = or disjoint i64 %offset.idx, 20
  %7 = or disjoint i64 %offset.idx, 24
  %8 = or disjoint i64 %offset.idx, 28
 %next.gep = getelementptr i8, ptr %array1.i, i64 %offset.idx
  %next.gep1 = getelementptr i8, ptr %array1.i, i64 %2
  %next.gep2 = getelementptr i8, ptr %array1.i, i64 %3
  %next.gep3 = getelementptr i8, ptr %array1.i, i64 %4
 %next.gep4 = getelementptr i8, ptr %array1.i, i64 %5
  %next.gep5 = getelementptr i8, ptr %array1.i, i64 %6
  %next.gep6 = getelementptr i8, ptr %array1.i, i64 %7
  %next.gep7 = getelementptr i8, ptr %array1.i, i64 %8
 call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep2) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep3) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep4) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep5) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep6) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep7) ]
  %9 = getelementptr inbounds nuw i8, ptr %next.gep, i64 4
  %10 = getelementptr inbounds nuw i8, ptr %next.gep1, i64 4
  %11 = getelementptr inbounds nuw i8, ptr %next.gep2, i64 4
  %12 = getelementptr inbounds nuw i8, ptr %next.gep3, i64 4
  %13 = getelementptr inbounds nuw i8, ptr %next.gep4, i64 4
  %14 = getelementptr inbounds nuw i8, ptr %next.gep5, i64 4
  %15 = getelementptr inbounds nuw i8, ptr %next.gep6, i64 4
  %16 = getelementptr inbounds nuw i8, ptr %next.gep7, i64 4
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %9) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %10) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %11) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %12) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %13) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %14) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %15) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %16) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep2) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep3) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep4) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep5) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep6) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep7) ]
  %17 = getelementptr i8, ptr %next.gep, i64 16
  %wide.load = load <4 x i32>, ptr %next.gep, align 4, !noalias !12
  %wide.load8 = load <4 x i32>, ptr %17, align 4, !noalias !12
  %18 = mul <4 x i32> %wide.load, splat (i32 13)
  %19 = mul <4 x i32> %wide.load8, splat (i32 13)
  %20 = add <4 x i32> %18, splat (i32 7)
  %21 = add <4 x i32> %19, splat (i32 7)
  %22 = getelementptr inbounds nuw i32, ptr %array.i.i.i.i, i64 %index
  %23 = getelementptr inbounds nuw i8, ptr %22, i64 16
  store <4 x i32> %20, ptr %22, align 4
  store <4 x i32> %21, ptr %23, align 4
  %index.next = add nuw i64 %index, 8
  %24 = icmp eq i64 %index.next, 512
  br i1 %24, label %_ZN4core5array5drain16drain_array_with17hdab83ed713860683E.exit, label %vector.body, !llvm.loop !27

_ZN4core5array5drain16drain_array_with17hdab83ed713860683E.exit: ; preds = %vector.body
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.lifetime.end.p0(i64 2048, ptr nonnull %array1.i), !noalias !7
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(2048) %_0, ptr noundef nonnull align 4 dereferenceable(2048) %array.i.i.i.i, i64 2048, i1 false)
  ret void
}
```

Trunk and clean that up a bit, but it's still full of unnecessary `assume`s: <https://llvm.godbolt.org/z/b8oM1vxvT>

At a minimum this at least ought to be treated idempotently, since there's no need for 
```llvm
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
```
repeated in a row like that.  But it'd also be nice to optimize out all the ones that came from `GEP nuw`, for example.

---

(Or if this is the wrong way to do this, that'd be good to know and reflect in the langref too.)

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to