Issue 134736
Summary #108985 optimization doesn't remove strlen like loop body in some cases
Labels clang:bounds-safety
Assignees
Reporter delcypher
    In #108985 an optimization was added that tries to replace `strlen` like coding idioms with a call to `strlen`.

We ran into problems with this when this is optimization is used in `-fbounds-safety`.

For a test case like:

```
#include <ptrcheck.h>
int *__indexable convert_null_terminated_to_bounded_ptr(int *__null_terminated p) {
 return __unsafe_terminated_by_to_indexable(p);
}
```

the unoptimized IR looks like:

```
; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
define [2 x i64] @convert_null_terminated_to_bounded_ptr(ptr noundef %p) #0 {
entry:
  %retval = alloca %"__bounds_safety::wide_ptr.indexable", align 8
  %p.addr = alloca ptr, align 8
  %terminated_by.len = alloca i64, align 8
  store ptr %p, ptr %p.addr, align 8
  %0 = load ptr, ptr %p.addr, align 8
  store i64 0, ptr %terminated_by.len, align 8
  br label %terminated_by.loop_cond

terminated_by.loop_cond: ; preds = %terminated_by.loop_cont, %entry
  %terminated_by.len1 = load i64, ptr %terminated_by.len, align 8
  %1 = getelementptr inbounds i32, ptr %0, i64 %terminated_by.len1
  %terminated_by.elem = load i32, ptr %1, align 4
 %terminted_by.check_terminator = icmp eq i32 %terminated_by.elem, 0
  br i1 %terminted_by.check_terminator, label %terminated_by.loop_end, label %terminated_by.loop_cont

terminated_by.loop_cont: ; preds = %terminated_by.loop_cond
  %terminated_by.new_len = add i64 %terminated_by.len1, 1
  store i64 %terminated_by.new_len, ptr %terminated_by.len, align 8
  br label %terminated_by.loop_cond

terminated_by.loop_end: ; preds = %terminated_by.loop_cond
  %2 = load i64, ptr %terminated_by.len, align 8
  %3 = add i64 %2, 1
  %terminated_by.upper = getelementptr inbounds i32, ptr %0, i64 %3
  %4 = getelementptr inbounds nuw %"__bounds_safety::wide_ptr.indexable", ptr %retval, i32 0, i32 0
  store ptr %0, ptr %4, align 8
  %5 = getelementptr inbounds nuw %"__bounds_safety::wide_ptr.indexable", ptr %retval, i32 0, i32 1
  store ptr %terminated_by.upper, ptr %5, align 8
  %6 = load [2 x i64], ptr %retval, align 8
  ret [2 x i64] %6
}
```

If we optimize this with `-mllvm  -disable-loop-idiom-wcslen` the IR looks like

```
define [2 x i64] @convert_null_terminated_to_bounded_ptr(ptr noundef %p) local_unnamed_addr #0 {
entry:
  br label %terminated_by.loop_cond

terminated_by.loop_cond: ; preds = %terminated_by.loop_cond, %entry
  %terminated_by.len.0 = phi i64 [ 0, %entry ], [ %terminated_by.new_len, %terminated_by.loop_cond ]
  %0 = getelementptr inbounds i32, ptr %p, i64 %terminated_by.len.0
 %terminated_by.elem = load i32, ptr %0, align 4
 %terminted_by.check_terminator = icmp eq i32 %terminated_by.elem, 0
 %terminated_by.new_len = add i64 %terminated_by.len.0, 1
  br i1 %terminted_by.check_terminator, label %terminated_by.loop_end, label %terminated_by.loop_cond

terminated_by.loop_end: ; preds = %terminated_by.loop_cond
  %1 = getelementptr inbounds i32, ptr %p, i64 %terminated_by.len.0
  %terminated_by.upper = getelementptr i8, ptr %1, i64 4
  %2 = ptrtoint ptr %p to i64
  %.fca.0.insert = insertvalue [2 x i64] poison, i64 %2, 0
  %3 = ptrtoint ptr %terminated_by.upper to i64
 %.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %3, 1
  ret [2 x i64] %.fca.1.insert
}
```

Now if we optimize with the pass enabled the result looks like

```
; Function Attrs: nofree nounwind ssp memory(argmem: read) uwtable(sync)
define [2 x i64] @convert_null_terminated_to_bounded_ptr(ptr noundef %p) local_unnamed_addr #0 {
entry:
  %wcslen = tail call i64 @wcslen(ptr %p)
  br label %terminated_by.loop_cond

terminated_by.loop_cond: ; preds = %terminated_by.loop_cond, %entry
  %terminated_by.len.0 = phi i64 [ 0, %entry ], [ %terminated_by.new_len, %terminated_by.loop_cond ]
  %0 = getelementptr inbounds i32, ptr %p, i64 %terminated_by.len.0
 %terminated_by.elem = load i32, ptr %0, align 4
 %terminted_by.check_terminator = icmp eq i32 %terminated_by.elem, 0
 %terminated_by.new_len = add i64 %terminated_by.len.0, 1
  br i1 %terminted_by.check_terminator, label %terminated_by.loop_end, label %terminated_by.loop_cond

terminated_by.loop_end: ; preds = %terminated_by.loop_cond
  %1 = getelementptr inbounds i32, ptr %p, i64 %wcslen
  %terminated_by.upper = getelementptr i8, ptr %1, i64 4
 %2 = ptrtoint ptr %p to i64
  %.fca.0.insert = insertvalue [2 x i64] poison, i64 %2, 0
  %3 = ptrtoint ptr %terminated_by.upper to i64
  %.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %3, 1
  ret [2 x i64] %.fca.1.insert
}
```

This IR doesn't seem great because the call to `wcslen` is pointless because the loop body is still present. I would expect the perf of this code to be worse.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to