| Issue |
179521
|
| Summary |
eraseFromParent not updating function entry count upon optimizing out call
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
zBION1C
|
In the following IR code, upon the removal of the call to the `rshift` function, the profile data with the entry count of the function is not updated. We found the problem to be rooted in the helper function `eraseFromParent()`, which fails to update profile metadata correctly.
We spotted the issue with the bisect tool and manual code review, providing here a minimized test input from studying `-O1` optimization of a larger profiled program. During said optimization, the inconsistency first occurs when `early-cse` is applied.
We reproduced this issue with commit [79eb804](https://github.com/llvm/llvm-project/commit/79eb804954bee06a62a3d31c697652f6dce982b1).
We provide an LLVM test to be run with `lit` that reproduces it, specifically by applying `early-cse`.
For full context, if the pass is disabled from the`-O1` optimization pipeline with `-opt-disable`, the issue shortly disappears to only reoccur later when `instcombine` is applied. Disabling also that and repeating the analysis, the issue is shown to affect also `reassociate`, `bdce`, `adce` and `instsimplify`. All these passes resort to the helper function `eraseFromParent()`.
```llvm
; RUN: opt < %s -passes="early-cse" -S -o - | FileCheck %s
; This checks if the function entry count metadata is updated correctly after
; early CSE erases instructions from a func_10. This issue is caused by
; eraseFromParent not updating the function entry count metadata upon removal
;CHECK: define internal fastcc signext range(i16 -32768, 8192) i16 @rshift(i16 noundef signext %arg, i32 noundef range(i32 2, 7) %arg1) unnamed_addr #1 !prof !35 !PGOFuncName !37 {
;CHECK: !35 = !{!"function_entry_count", i64 0}
; ModuleID = '/root/prog.ll'
source_filename = "/root/prog.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@a = dso_local local_unnamed_addr global [6 x [9 x i32]] zeroinitializer, align 16
; Function Attrs: inlinehint noinline nounwind uwtable
define dso_local i32 @b() local_unnamed_addr #0 !prof !35 {
bb:
%i = call zeroext i8 @func_10(i8 noundef zeroext 0, i16 noundef signext -1)
ret i32 undef
}
; Function Attrs: inlinehint noinline nounwind uwtable
define dso_local zeroext i8 @func_10(i8 noundef zeroext %arg, i16 noundef signext %arg1) local_unnamed_addr #0 !prof !35 {
bb:
%.not = icmp eq i16 %arg1, 0
br i1 %.not, label %bb3, label %bb2, !prof !36
bb2: ; preds = %bb
%i = call fastcc signext i16 @rshift(i16 noundef signext %arg1, i32 noundef 6)
br label %bb7
bb3: ; preds = %bb
%i4 = call fastcc signext i16 @rshift(i16 noundef signext 0, i32 noundef 2)
%i5 = call fastcc signext i16 @sub(i16 noundef signext %i4, i16 noundef signext undef)
%i6 = sext i16 %i5 to i32
store i32 %i6, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 8
br label %bb7
bb7: ; preds = %bb3, %bb2
ret i8 undef
}
; Function Attrs: inlinehint mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
define internal fastcc signext range(i16 -32768, 8192) i16 @rshift(i16 noundef signext %arg, i32 noundef range(i32 2, 7) %arg1) unnamed_addr #1 !prof !35 !PGOFuncName !37 {
bb:
%i = icmp slt i16 %arg, 0
br i1 %i, label %bb6, label %bb2, !prof !38
bb2: ; preds = %bb
%i3 = zext nneg i16 %arg to i32
%i4 = lshr i32 %i3, %arg1
%i5 = trunc nuw nsw i32 %i4 to i16
br label %bb6
bb6: ; preds = %bb2, %bb
%i7 = phi i16 [ %i5, %bb2 ], [ %arg, %bb ]
ret i16 %i7
}
; Function Attrs: cold mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
define internal fastcc noundef signext i16 @sub(i16 noundef returned signext %arg, i16 noundef signext %arg1) unnamed_addr #2 !prof !39 {
bb:
ret i16 %arg
}
; Function Attrs: inlinehint noinline nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #0 !prof !35 {
bb:
%i = call i32 @b()
ret i32 0
}
attributes #0 = { inlinehint noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { inlinehint mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { cold mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5}
!llvm.ident = !{!34}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{i32 1, !"ProfileSummary", !6}
!6 = !{!7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
!7 = !{!"ProfileFormat", !"InstrProf"}
!8 = !{!"TotalCount", i64 200}
!9 = !{!"MaxCount", i64 50}
!10 = !{!"MaxInternalCount", i64 0}
!11 = !{!"MaxFunctionCount", i64 50}
!12 = !{!"NumCounts", i64 9}
!13 = !{!"NumFunctions", i64 5}
!14 = !{!"IsPartialProfile", i64 0}
!15 = !{!"PartialProfileRatio", double 0.000000e+00}
!16 = !{!"DetailedSummary", !17}
!17 = !{!18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33}
!18 = !{i32 10000, i64 50, i32 4}
!19 = !{i32 100000, i64 50, i32 4}
!20 = !{i32 200000, i64 50, i32 4}
!21 = !{i32 300000, i64 50, i32 4}
!22 = !{i32 400000, i64 50, i32 4}
!23 = !{i32 500000, i64 50, i32 4}
!24 = !{i32 600000, i64 50, i32 4}
!25 = !{i32 700000, i64 50, i32 4}
!26 = !{i32 800000, i64 50, i32 4}
!27 = !{i32 900000, i64 50, i32 4}
!28 = !{i32 950000, i64 50, i32 4}
!29 = !{i32 990000, i64 50, i32 4}
!30 = !{i32 999000, i64 50, i32 4}
!31 = !{i32 999900, i64 50, i32 4}
!32 = !{i32 999990, i64 50, i32 4}
!33 = !{i32 999999, i64 50, i32 4}
!34 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git 79eb804954bee06a62a3d31c697652f6dce982b1)"}
!35 = !{!"function_entry_count", i64 50}
!36 = !{!"branch_weights", i32 0, i32 50}
!37 = !{!"/root/prog.c;rshift"}
!38 = !{!"branch_weights", i32 50, i32 0}
!39 = !{!"function_entry_count", i64 0}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs