Issue 179521
Summary eraseFromParent not updating function entry count upon optimizing out call
Labels new issue
Assignees
Reporter zBION1C
    In the following IR code, upon the removal of the call to the `rshift` function, the profile data with the entry count of the function is not updated. We found the problem to be rooted in the helper function `eraseFromParent()`,  which fails to update profile metadata correctly.

We spotted the issue with the bisect tool and manual code review, providing here a minimized test input from studying `-O1` optimization of a larger profiled program. During said optimization, the inconsistency first occurs when `early-cse` is applied.

We reproduced this issue with commit [79eb804](https://github.com/llvm/llvm-project/commit/79eb804954bee06a62a3d31c697652f6dce982b1).

We provide an LLVM test to be run with `lit` that reproduces it, specifically by applying `early-cse`.

For full context, if the pass is disabled from the`-O1` optimization pipeline with `-opt-disable`, the issue shortly disappears to only reoccur later when `instcombine` is applied. Disabling also that and repeating the analysis, the issue is shown to affect also `reassociate`, `bdce`, `adce` and `instsimplify`. All these passes resort to the helper function  `eraseFromParent()`.

```llvm
; RUN: opt < %s -passes="early-cse" -S -o - | FileCheck %s

; This checks if the function entry count metadata is updated correctly after
; early CSE erases instructions from a func_10. This issue is caused by
; eraseFromParent not updating the function entry count metadata upon removal 
;CHECK: define internal fastcc signext range(i16 -32768, 8192) i16 @rshift(i16 noundef signext %arg, i32 noundef range(i32 2, 7) %arg1) unnamed_addr #1 !prof !35 !PGOFuncName !37 {
;CHECK: !35 = !{!"function_entry_count", i64 0}

; ModuleID = '/root/prog.ll'
source_filename = "/root/prog.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@a = dso_local local_unnamed_addr global [6 x [9 x i32]] zeroinitializer, align 16

; Function Attrs: inlinehint noinline nounwind uwtable
define dso_local i32 @b() local_unnamed_addr #0 !prof !35 {
bb:
  %i = call zeroext i8 @func_10(i8 noundef zeroext 0, i16 noundef signext -1)
  ret i32 undef
}

; Function Attrs: inlinehint noinline nounwind uwtable
define dso_local zeroext i8 @func_10(i8 noundef zeroext %arg, i16 noundef signext %arg1) local_unnamed_addr #0 !prof !35 {
bb:
  %.not = icmp eq i16 %arg1, 0
  br i1 %.not, label %bb3, label %bb2, !prof !36

bb2: ; preds = %bb
  %i = call fastcc signext i16 @rshift(i16 noundef signext %arg1, i32 noundef 6)
  br label %bb7

bb3: ; preds = %bb
  %i4 = call fastcc signext i16 @rshift(i16 noundef signext 0, i32 noundef 2)
  %i5 = call fastcc signext i16 @sub(i16 noundef signext %i4, i16 noundef signext undef)
  %i6 = sext i16 %i5 to i32
  store i32 %i6, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 8
  br label %bb7

bb7: ; preds = %bb3, %bb2
  ret i8 undef
}

; Function Attrs: inlinehint mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
define internal fastcc signext range(i16 -32768, 8192) i16 @rshift(i16 noundef signext %arg, i32 noundef range(i32 2, 7) %arg1) unnamed_addr #1 !prof !35 !PGOFuncName !37 {
bb:
  %i = icmp slt i16 %arg, 0
  br i1 %i, label %bb6, label %bb2, !prof !38

bb2: ; preds = %bb
  %i3 = zext nneg i16 %arg to i32
 %i4 = lshr i32 %i3, %arg1
  %i5 = trunc nuw nsw i32 %i4 to i16
  br label %bb6

bb6:                                              ; preds = %bb2, %bb
  %i7 = phi i16 [ %i5, %bb2 ], [ %arg, %bb ]
  ret i16 %i7
}

; Function Attrs: cold mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
define internal fastcc noundef signext i16 @sub(i16 noundef returned signext %arg, i16 noundef signext %arg1) unnamed_addr #2 !prof !39 {
bb:
  ret i16 %arg
}

; Function Attrs: inlinehint noinline nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #0 !prof !35 {
bb:
  %i = call i32 @b()
  ret i32 0
}

attributes #0 = { inlinehint noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { inlinehint mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { cold mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3, !4, !5}
!llvm.ident = !{!34}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{i32 1, !"ProfileSummary", !6}
!6 = !{!7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
!7 = !{!"ProfileFormat", !"InstrProf"}
!8 = !{!"TotalCount", i64 200}
!9 = !{!"MaxCount", i64 50}
!10 = !{!"MaxInternalCount", i64 0}
!11 = !{!"MaxFunctionCount", i64 50}
!12 = !{!"NumCounts", i64 9}
!13 = !{!"NumFunctions", i64 5}
!14 = !{!"IsPartialProfile", i64 0}
!15 = !{!"PartialProfileRatio", double 0.000000e+00}
!16 = !{!"DetailedSummary", !17}
!17 = !{!18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33}
!18 = !{i32 10000, i64 50, i32 4}
!19 = !{i32 100000, i64 50, i32 4}
!20 = !{i32 200000, i64 50, i32 4}
!21 = !{i32 300000, i64 50, i32 4}
!22 = !{i32 400000, i64 50, i32 4}
!23 = !{i32 500000, i64 50, i32 4}
!24 = !{i32 600000, i64 50, i32 4}
!25 = !{i32 700000, i64 50, i32 4}
!26 = !{i32 800000, i64 50, i32 4}
!27 = !{i32 900000, i64 50, i32 4}
!28 = !{i32 950000, i64 50, i32 4}
!29 = !{i32 990000, i64 50, i32 4}
!30 = !{i32 999000, i64 50, i32 4}
!31 = !{i32 999900, i64 50, i32 4}
!32 = !{i32 999990, i64 50, i32 4}
!33 = !{i32 999999, i64 50, i32 4}
!34 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git 79eb804954bee06a62a3d31c697652f6dce982b1)"}
!35 = !{!"function_entry_count", i64 50}
!36 = !{!"branch_weights", i32 0, i32 50}
!37 = !{!"/root/prog.c;rshift"}
!38 = !{!"branch_weights", i32 50, i32 0}
!39 = !{!"function_entry_count", i64 0}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to