llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-debuginfo Author: Orlando Cazalet-Hyams (OCHyams) <details> <summary>Changes</summary> Interpret Key Instructions metadata to determine is_stmt placement. The lowest rank (highest precedent) instructions in each {InlinedAt, atomGroup} set are candidates for is_stmt. Only the last instruction in each set in a given block gets is_stmt. Calls always get is_stmt. --- Patch is 35.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133495.diff 8 Files Affected: - (modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (+198-12) - (modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h (+9) - (added) llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic-ranks.ll (+68) - (added) llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic.ll (+62) - (added) llvm/test/DebugInfo/KeyInstructions/X86/dwarf-buoy-multi-key.mir (+78) - (added) llvm/test/DebugInfo/KeyInstructions/X86/dwarf-buoy.mir (+66) - (added) llvm/test/DebugInfo/KeyInstructions/X86/dwarf-calls.ll (+117) - (added) llvm/test/DebugInfo/KeyInstructions/X86/dwarf-ranks-blocks.ll (+65) ``````````diff diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 39f1299a24e81..71abef1d2383b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -17,6 +17,7 @@ #include "DwarfExpression.h" #include "DwarfUnit.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -170,6 +171,9 @@ static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option( "Stuff")), cl::init(DwarfDebug::MinimizeAddrInV5::Default)); +static cl::opt<bool> KeyInstructionsAreStmts("dwarf-use-key-instructions", + cl::Hidden, cl::init(false)); + static constexpr unsigned ULEB128PadSize = 4; void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { @@ -2069,6 +2073,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { unsigned LastAsmLine = Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine(); + bool IsKey = false; + if (KeyInstructionsAreStmts && DL && DL.getLine()) + IsKey = KeyInstructions.contains(MI); + if (!DL && MI == PrologEndLoc) { // In rare situations, we might want to place the end of the prologue // somewhere that doesn't have a source location already. It should be in @@ -2087,13 +2095,18 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // If we have an ongoing unspecified location, nothing to do here. if (!DL) return; - // We have an explicit location, same as the previous location. - // But we might be coming back to it after a line 0 record. - if ((LastAsmLine == 0 && DL.getLine() != 0) || Flags) { - // Reinstate the source location but not marked as a statement. - RecordSourceLine(DL, Flags); + + // Skip this if the instruction is Key, else we might accidentally miss an + // is_stmt. + if (!IsKey) { + // We have an explicit location, same as the previous location. + // But we might be coming back to it after a line 0 record. + if ((LastAsmLine == 0 && DL.getLine() != 0) || Flags) { + // Reinstate the source location but not marked as a statement. + RecordSourceLine(DL, Flags); + } + return; } - return; } if (!DL) { @@ -2136,11 +2149,17 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT; PrologEndLoc = nullptr; } - // If the line changed, we call that a new statement; unless we went to - // line 0 and came back, in which case it is not a new statement. - unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine; - if (DL.getLine() && (DL.getLine() != OldLine || ForceIsStmt)) - Flags |= DWARF2_FLAG_IS_STMT; + + if (KeyInstructionsAreStmts) { + if (IsKey) + Flags |= DWARF2_FLAG_IS_STMT; + } else { + // If the line changed, we call that a new statement; unless we went to + // line 0 and came back, in which case it is not a new statement. + unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine; + if (DL.getLine() && (DL.getLine() != OldLine || ForceIsStmt)) + Flags |= DWARF2_FLAG_IS_STMT; + } RecordSourceLine(DL, Flags); @@ -2333,6 +2352,170 @@ DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, unsigned CUID) { return PrologEndLoc; } +void DwarfDebug::findKeyInstructions(const MachineFunction *MF) { + // New function - reset KeyInstructions. + KeyInstructions.clear(); + + // The current candidate is_stmt instructions for each source atom. + // Map {(InlinedAt, Group): (Rank, Instructions)}. + DenseMap<std::pair<DILocation *, uint32_t>, + std::pair<uint16_t, SmallVector<const MachineInstr *>>> + GroupCandidates; + + // For each instruction: + // * Skip insts without DebugLoc, AtomGroup or AtomRank, and line zeros. + // * Check if insts in this group have been seen already in GroupCandidates. + // * If this instr rank is equal, add this instruction to KeyInstructions. + // Remove existing instructions from KeyInstructions if they have the + // same parent. + // * If this instr rank is higher (lower precedence), ignore it. + // * If this instr rank is lower (higher precedence), erase existing + // instructions from KeyInstructions. Add this instr to KeyInstructions. + + for (auto &MBB : *MF) { + // Rather than apply is_stmt directly to Key Instructions, we "float" + // is_stmt up to the 1st instruction with the same line number in a + // contiguous block. That instruction is called the "buoy". The + // buoy gets reset if we encouner an instruction with an atom + // group. + const MachineInstr *Buoy = nullptr; + // The atom group number associated with Buoy which may be 0 if we haven't + // encountered an atom group yet in this blob of instructions with the same + // line number. + uint64_t BuoyAtom = 0; + + for (auto &MI : MBB) { + if (MI.isMetaInstruction()) + continue; + + if (!MI.getDebugLoc() || !MI.getDebugLoc().getLine()) + continue; + + // Reset the Buoy to this instruciton if it has a different line number. + if (!Buoy || + Buoy->getDebugLoc().getLine() != MI.getDebugLoc().getLine()) { + Buoy = &MI; + BuoyAtom = 0; + } + + // Call instructions are handled specially - we always mark them as key + // regardless of atom info. + const auto &TII = + *MI.getParent()->getParent()->getSubtarget().getInstrInfo(); + if (MI.isCall() || TII.isTailCall(MI)) { + assert(MI.getDebugLoc() && "Unexpectedly missing DL"); + + // Calls are always key. + KeyInstructions.insert(Buoy); + + uint64_t Group = MI.getDebugLoc()->getAtomGroup(); + uint8_t Rank = MI.getDebugLoc()->getAtomRank(); + if (Group && Rank) { + auto *InlinedAt = MI.getDebugLoc()->getInlinedAt(); + auto &[CandidateRank, CandidateInsts] = GroupCandidates[{InlinedAt, Group}]; + + // This looks similar to the non-call handling code, except that + // we don't put the call into CandidateInsts so that they can't be + // made un-key. As a result, we also have to take special care not + // to erase the is_stmt from the buoy, and prevent that happening + // in the future. + + if (CandidateRank == Rank) { + // We've seen other instructions in this group of this rank. Discard + // ones we've seen in this block, keep the others. + assert(!CandidateInsts.empty()); + SmallVector<const MachineInstr *> Insts; + Insts.reserve(CandidateInsts.size()); + for (auto &PrevInst : CandidateInsts) { + if (PrevInst->getParent() != MI.getParent()) + Insts.push_back(PrevInst); + else if (PrevInst != Buoy) + KeyInstructions.erase(PrevInst); + } + + if (Insts.empty()) { + CandidateInsts.clear(); + CandidateRank = 0; + } else { + CandidateInsts = std::move(Insts); + } + + } else if (CandidateRank > Rank) { + // We've seen other instructions in this group of lower precedence + // (higher rank). Discard them. + for (auto *Supplanted : CandidateInsts) { + // Don't erase the is_stmt we're using for this call. + if (Supplanted != Buoy) + KeyInstructions.erase(Supplanted); + } + CandidateInsts.clear(); + CandidateRank = 0; + } + } + + // Avoid floating any future is_stmts up to the call. + Buoy = nullptr; + continue; + } + + auto *InlinedAt = MI.getDebugLoc()->getInlinedAt(); + uint64_t Group = MI.getDebugLoc()->getAtomGroup(); + uint8_t Rank = MI.getDebugLoc()->getAtomRank(); + if (!Group || !Rank) + continue; + + // Don't let is_stmts float past instructions from different source atoms. + if (BuoyAtom && BuoyAtom != Group) { + Buoy = &MI; + BuoyAtom = MI.getDebugLoc()->getAtomGroup(); + } + + auto &[CandidateRank, CandidateInsts] = GroupCandidates[{InlinedAt, Group}]; + + if (CandidateRank == 0) { + // This is the first time we're seeing an instruction in this atom + // group. Add it to the map. + assert(CandidateInsts.empty()); + CandidateRank = Rank; + CandidateInsts.push_back(Buoy); + + } else if (CandidateRank == Rank) { + // We've seen other instructions in this group of this rank. Discard + // ones we've seen in this block, keep the others, add this one. + assert(!CandidateInsts.empty()); + SmallVector<const MachineInstr *> Insts; + Insts.reserve(CandidateInsts.size() + 1); + for (auto &PrevInst : CandidateInsts) { + if (PrevInst->getParent() != MI.getParent()) + Insts.push_back(PrevInst); + else + KeyInstructions.erase(PrevInst); + } + Insts.push_back(Buoy); + CandidateInsts = std::move(Insts); + + } else if (CandidateRank > Rank) { + // We've seen other instructions in this group of lower precedence + // (higher rank). Discard them, add this one. + assert(!CandidateInsts.empty()); + CandidateRank = Rank; + for (auto *Supplanted : CandidateInsts) + KeyInstructions.erase(Supplanted); + CandidateInsts = {Buoy}; + + } else { + // We've seen other instructions in this group with higher precedence + // (lower rank). Discard this one. + assert(Rank != 0 && CandidateRank < Rank && CandidateRank != 0); + continue; + } + KeyInstructions.insert(Buoy); + assert(!BuoyAtom || BuoyAtom == MI.getDebugLoc()->getAtomGroup()); + BuoyAtom = MI.getDebugLoc()->getAtomGroup(); + } + } +} + /// For the function \p MF, finds the set of instructions which may represent a /// change in line number from one or more of the preceding MBBs. Stores the /// resulting set of instructions, which should have is_stmt set, in @@ -2491,7 +2674,10 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { PrologEndLoc = emitInitialLocDirective( *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID()); - findForceIsStmtInstrs(MF); + if (KeyInstructionsAreStmts) + findKeyInstructions(MF); + else + findForceIsStmtInstrs(MF); } unsigned diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 58e6d39f76ae0..cd232a8e94ecf 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -464,6 +464,10 @@ class DwarfDebug : public DebugHandlerBase { }; private: + /// Instructions which should get is_stmt applied because they implement key + /// functionality for a source atom. + SmallDenseSet<const MachineInstr *> KeyInstructions; + /// Force the use of DW_AT_ranges even for single-entry range lists. MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled; @@ -701,6 +705,11 @@ class DwarfDebug : public DebugHandlerBase { void findForceIsStmtInstrs(const MachineFunction *MF); + /// Find instructions which should get is_stmt applied because they implement + /// key functionality for a source atom, store results in + /// DwarfDebug::KeyInstructions. + void findKeyInstructions(const MachineFunction *MF); + protected: /// Gather pre-function debug information. void beginFunctionImpl(const MachineFunction *MF) override; diff --git a/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic-ranks.ll b/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic-ranks.ll new file mode 100644 index 0000000000000..71ecf1dc41238 --- /dev/null +++ b/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic-ranks.ll @@ -0,0 +1,68 @@ +; RUN: llc %s --filetype=obj -o - --dwarf-use-key-instructions \ +; RUN: | llvm-objdump -d - --no-show-raw-insn \ +; RUN: | FileCheck %s --check-prefix=OBJ + +; RUN: llc %s --filetype=obj -o - --dwarf-use-key-instructions \ +; RUN: | llvm-dwarfdump - --debug-line \ +; RUN: | FileCheck %s --check-prefix=DBG + +; OBJ: 0000000000000000 <_Z1fPiii>: +; OBJ-NEXT: 0: pushq %rbp +; OBJ-NEXT: 1: pushq %r14 +; OBJ-NEXT: 3: pushq %rbx +; OBJ-NEXT: 4: movl %edx, %ebx +; OBJ-NEXT: 6: movl %esi, %ebp +; OBJ-NEXT: 8: movq %rdi, %r14 +; OBJ-NEXT: b: callq 0x10 <_Z1fPiii+0x10> +; OBJ-NEXT: 10: addl %ebx, %ebp +; OBJ-NEXT: 12: movl %ebp, (%r14) +; OBJ-NEXT: 15: movl %ebp, %eax +; OBJ-NEXT: 17: popq %rbx +; OBJ-NEXT: 18: popq %r14 +; OBJ-NEXT: 1a: popq %rbp + +; DBG: Address Line Column File ISA Discriminator OpIndex Flags +; DBG-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- +; DBG-NEXT: 0x0000000000000000 3 0 0 0 0 0 is_stmt +; DBG-NEXT: 0x000000000000000b 4 0 0 0 0 0 is_stmt prologue_end +; DBG-NEXT: 0x0000000000000010 6 0 0 0 0 0 +; DBG-NEXT: 0x0000000000000012 5 0 0 0 0 0 is_stmt +; DBG-NEXT: 0x0000000000000015 7 0 0 0 0 0 is_stmt +; DBG-NEXT: 0x0000000000000017 7 0 0 0 0 0 epilogue_begin +; DBG-NEXT: 0x000000000000001c 7 0 0 0 0 0 end_sequence + +;; 1. [[gnu::nodebug]] void prologue_end(); +;; 2. +;; 3. int f(int *a, int b, int c) { +;; 4. prologue_end(); +;; 5. *a = +;; 6. b + c; +;; 7. return *a; +;; 8. } + +;; The add and store are in the same goup (1). The add (line 6) has lower +;; precedence (rank 2) so should not get is_stmt applied. +target triple = "x86_64-unknown-linux-gnu" + +define hidden noundef i32 @_Z1fPiii(ptr %a, i32 %b, i32 %c) local_unnamed_addr !dbg !11 { +entry: + tail call void @_Z12prologue_endv(), !dbg !DILocation(line: 4, scope: !11) + %add = add nsw i32 %c, %b, !dbg !DILocation(line: 6, scope: !11, atomGroup: 1, atomRank: 2) + store i32 %add, ptr %a, align 4, !dbg !DILocation(line: 5, scope: !11, atomGroup: 1, atomRank: 1) + ret i32 %add, !dbg !DILocation(line: 7, scope: !11, atomGroup: 2, atomRank: 1) +} + +declare void @_Z12prologue_endv() local_unnamed_addr #1 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_17, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !{!"clang version 19.0.0"} +!11 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!12 = !DISubroutineType(types: !13) +!13 = !{} diff --git a/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic.ll b/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic.ll new file mode 100644 index 0000000000000..e3b0184a837f8 --- /dev/null +++ b/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-basic.ll @@ -0,0 +1,62 @@ +; RUN: llc %s --filetype=obj -o - --dwarf-use-key-instructions \ +; RUN: | llvm-objdump -d - --no-show-raw-insn \ +; RUN: | FileCheck %s --check-prefix=OBJ + +; RUN: llc %s --filetype=obj -o - --dwarf-use-key-instructions \ +; RUN: | llvm-dwarfdump - --debug-line \ +; RUN: | FileCheck %s --check-prefix=DBG + +; OBJ: 0000000000000000 <_Z1fi>: +; OBJ-NEXT: 0: leal 0x1(%rdi), %eax +; OBJ-NEXT: 3: retq +; OBJ: 0000000000000010 <_Z1gi>: +; OBJ-NEXT: 10: leal 0x1(%rdi), %eax +; OBJ-NEXT: 13: retq + +; DBG: Address Line Column File ISA Discriminator OpIndex Flags +; DBG-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- +; DBG-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt prologue_end +; DBG-NEXT: 0x0000000000000003 3 0 0 0 0 0 is_stmt +; DBG-NEXT: 0x0000000000000010 2 0 0 0 0 0 is_stmt prologue_end +; DBG-NEXT: 0x0000000000000013 6 0 0 0 0 0 is_stmt + +;; 1. int f(int a) { +;; 2. int x = a + 1; +;; 3. return x; +;; 4. } +;; 5. int g(int b) { +;; 6. return f(b); +;; 7. } +;; +;; Both functions contain 2 instructions in unique atom groups. In f we see +;; groups 1 and 3, and in g we see {!18, 1} and 1. All of these instructions +;; should get is_stmt. + +target triple = "x86_64-unknown-linux-gnu" + +define hidden noundef i32 @_Z1fi(i32 noundef %a) local_unnamed_addr !dbg !11 { +entry: + %add = add nsw i32 %a, 1, !dbg !DILocation(line: 2, scope: !11, atomGroup: 1, atomRank: 2) + ret i32 %add, !dbg !DILocation(line: 3, scope: !11, atomGroup: 3, atomRank: 1) +} + +define hidden noundef i32 @_Z1gi(i32 noundef %b) local_unnamed_addr !dbg !16 { +entry: + %add.i = add nsw i32 %b, 1, !dbg !DILocation(line: 2, scope: !11, inlinedAt: !18, atomGroup: 1, atomRank: 2) + ret i32 %add.i, !dbg !DILocation(line: 6, scope: !16, atomGroup: 1, atomRank: 1) +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_17, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !{!"clang version 19.0.0"} +!11 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!12 = !DISubroutineType(types: !13) +!13 = !{} +!16 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 5, type: !12, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!18 = distinct !DILocation(line: 6, scope: !16) diff --git a/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-buoy-multi-key.mir b/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-buoy-multi-key.mir new file mode 100644 index 0000000000000..c8459b4ced600 --- /dev/null +++ b/llvm/test/DebugInfo/KeyInstructions/X86/dwarf-buoy-multi-key.mir @@ -0,0 +1,78 @@ +# RUN: llc %s --start-after=livedebugvalues --dwarf-use-key-instructions --filetype=obj -o - \ +# RUN: | llvm-objdump -d - --no-show-raw-insn \ +# RUN: | FileCheck %s --check-prefix=OBJ + +# RUN: llc %s --start-after=livedebugvalues --dwarf-use-key-instructions --filetype=obj -o - \ +# RUN: | llvm-dwarfdump - --debug-line \ +# RUN: | FileCheck %s --check-prefix=DBG + +# OBJ: 0000000000000000 <_Z1fPiii>: +# OBJ-NEXT: 0: movl $0x0, %ebx +# OBJ-NEXT: 5: movl $0x1, %ebx +# OBJ-NEXT: a: movl $0x2, %ebx +# OBJ-NEXT: f: movl $0x3, %ebx +# OBJ-NEXT: 14: movl $0x4, %eax +# OBJ-NEXT: 19: movl $0x5, %eax +# OBJ-NEXT: 1e: movl $0x6, %eax +# OBJ-NEXT: 23: movl $0x7, %eax +# OBJ-NEXT: 28: retq + +# DBG: Address Line Column File ISA Discriminator OpIndex Flags +# DBG-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- +# DBG-NEXT: 0x0000000000000000 1 0 0 0 0 0 is_stmt prologue_end +# DBG-NEXT: 0x0000000000000005 2 0 0 0 0 0 is_stmt +# DBG-NEXT: 0x000000000000000a 2 0 0 0 0 0 +# DBG-NEXT: 0x000000000000000f 2 0 0 0 0 0 +# DBG-NEXT: 0x0000000000000014 2 0 0 0 0 0 +# DBG-NEXT: 0x0000000000000019 2 0 0 0 0 0 +# DBG-NEXT: 0x000000000000001e 2 0 0 0 ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/133495 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits