Issue |
134736
|
Summary |
#108985 optimization doesn't remove strlen like loop body in some cases
|
Labels |
clang:bounds-safety
|
Assignees |
|
Reporter |
delcypher
|
In #108985 an optimization was added that tries to replace `strlen` like coding idioms with a call to `strlen`.
We ran into problems with this when this is optimization is used in `-fbounds-safety`.
For a test case like:
```
#include <ptrcheck.h>
int *__indexable convert_null_terminated_to_bounded_ptr(int *__null_terminated p) {
return __unsafe_terminated_by_to_indexable(p);
}
```
the unoptimized IR looks like:
```
; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
define [2 x i64] @convert_null_terminated_to_bounded_ptr(ptr noundef %p) #0 {
entry:
%retval = alloca %"__bounds_safety::wide_ptr.indexable", align 8
%p.addr = alloca ptr, align 8
%terminated_by.len = alloca i64, align 8
store ptr %p, ptr %p.addr, align 8
%0 = load ptr, ptr %p.addr, align 8
store i64 0, ptr %terminated_by.len, align 8
br label %terminated_by.loop_cond
terminated_by.loop_cond: ; preds = %terminated_by.loop_cont, %entry
%terminated_by.len1 = load i64, ptr %terminated_by.len, align 8
%1 = getelementptr inbounds i32, ptr %0, i64 %terminated_by.len1
%terminated_by.elem = load i32, ptr %1, align 4
%terminted_by.check_terminator = icmp eq i32 %terminated_by.elem, 0
br i1 %terminted_by.check_terminator, label %terminated_by.loop_end, label %terminated_by.loop_cont
terminated_by.loop_cont: ; preds = %terminated_by.loop_cond
%terminated_by.new_len = add i64 %terminated_by.len1, 1
store i64 %terminated_by.new_len, ptr %terminated_by.len, align 8
br label %terminated_by.loop_cond
terminated_by.loop_end: ; preds = %terminated_by.loop_cond
%2 = load i64, ptr %terminated_by.len, align 8
%3 = add i64 %2, 1
%terminated_by.upper = getelementptr inbounds i32, ptr %0, i64 %3
%4 = getelementptr inbounds nuw %"__bounds_safety::wide_ptr.indexable", ptr %retval, i32 0, i32 0
store ptr %0, ptr %4, align 8
%5 = getelementptr inbounds nuw %"__bounds_safety::wide_ptr.indexable", ptr %retval, i32 0, i32 1
store ptr %terminated_by.upper, ptr %5, align 8
%6 = load [2 x i64], ptr %retval, align 8
ret [2 x i64] %6
}
```
If we optimize this with `-mllvm -disable-loop-idiom-wcslen` the IR looks like
```
define [2 x i64] @convert_null_terminated_to_bounded_ptr(ptr noundef %p) local_unnamed_addr #0 {
entry:
br label %terminated_by.loop_cond
terminated_by.loop_cond: ; preds = %terminated_by.loop_cond, %entry
%terminated_by.len.0 = phi i64 [ 0, %entry ], [ %terminated_by.new_len, %terminated_by.loop_cond ]
%0 = getelementptr inbounds i32, ptr %p, i64 %terminated_by.len.0
%terminated_by.elem = load i32, ptr %0, align 4
%terminted_by.check_terminator = icmp eq i32 %terminated_by.elem, 0
%terminated_by.new_len = add i64 %terminated_by.len.0, 1
br i1 %terminted_by.check_terminator, label %terminated_by.loop_end, label %terminated_by.loop_cond
terminated_by.loop_end: ; preds = %terminated_by.loop_cond
%1 = getelementptr inbounds i32, ptr %p, i64 %terminated_by.len.0
%terminated_by.upper = getelementptr i8, ptr %1, i64 4
%2 = ptrtoint ptr %p to i64
%.fca.0.insert = insertvalue [2 x i64] poison, i64 %2, 0
%3 = ptrtoint ptr %terminated_by.upper to i64
%.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %3, 1
ret [2 x i64] %.fca.1.insert
}
```
Now if we optimize with the pass enabled the result looks like
```
; Function Attrs: nofree nounwind ssp memory(argmem: read) uwtable(sync)
define [2 x i64] @convert_null_terminated_to_bounded_ptr(ptr noundef %p) local_unnamed_addr #0 {
entry:
%wcslen = tail call i64 @wcslen(ptr %p)
br label %terminated_by.loop_cond
terminated_by.loop_cond: ; preds = %terminated_by.loop_cond, %entry
%terminated_by.len.0 = phi i64 [ 0, %entry ], [ %terminated_by.new_len, %terminated_by.loop_cond ]
%0 = getelementptr inbounds i32, ptr %p, i64 %terminated_by.len.0
%terminated_by.elem = load i32, ptr %0, align 4
%terminted_by.check_terminator = icmp eq i32 %terminated_by.elem, 0
%terminated_by.new_len = add i64 %terminated_by.len.0, 1
br i1 %terminted_by.check_terminator, label %terminated_by.loop_end, label %terminated_by.loop_cond
terminated_by.loop_end: ; preds = %terminated_by.loop_cond
%1 = getelementptr inbounds i32, ptr %p, i64 %wcslen
%terminated_by.upper = getelementptr i8, ptr %1, i64 4
%2 = ptrtoint ptr %p to i64
%.fca.0.insert = insertvalue [2 x i64] poison, i64 %2, 0
%3 = ptrtoint ptr %terminated_by.upper to i64
%.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %3, 1
ret [2 x i64] %.fca.1.insert
}
```
This IR doesn't seem great because the call to `wcslen` is pointless because the loop body is still present. I would expect the perf of this code to be worse.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs