Prabhuk updated this revision to Diff 558151.
Prabhuk added a comment.
Rebased the patchset and addressed the compilation failures
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D105907/new/
https://reviews.llvm.org/D105907
Files:
llvm/include/llvm/CodeGen/AsmPrinter.h
llvm/include/llvm/MC/MCObjectFileInfo.h
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
llvm/lib/MC/MCObjectFileInfo.cpp
llvm/test/CodeGen/call-graph-section.ll
Index: llvm/test/CodeGen/call-graph-section.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/call-graph-section.ll
@@ -0,0 +1,73 @@
+; Tests that we store the type identifiers in .callgraph section of the binary.
+
+; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
+; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo() #0 !type !4 {
+entry:
+ ret void
+}
+
+define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
+entry:
+ %a.addr = alloca i8, align 1
+ store i8 %a, i8* %a.addr, align 1
+ ret i32 0
+}
+
+define dso_local i32* @baz(i8* %a) #0 !type !6 {
+entry:
+ %a.addr = alloca i8*, align 8
+ store i8* %a, i8** %a.addr, align 8
+ ret i32* null
+}
+
+define dso_local i32 @main() #0 !type !7 {
+entry:
+ %retval = alloca i32, align 4
+ %fp_foo = alloca void (...)*, align 8
+ %a = alloca i8, align 1
+ %fp_bar = alloca i32 (i8)*, align 8
+ %fp_baz = alloca i32* (i8*)*, align 8
+ store i32 0, i32* %retval, align 4
+ store void (...)* bitcast (void ()* @foo to void (...)*), void (...)** %fp_foo, align 8
+ %0 = load void (...)*, void (...)** %fp_foo, align 8
+ call void (...) %0() [ "type"(metadata !"_ZTSFvE.generalized") ]
+ store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8
+ %1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8
+ %2 = load i8, i8* %a, align 1
+ %call = call i32 %1(i8 signext %2) [ "type"(metadata !"_ZTSFicE.generalized") ]
+ store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8
+ %3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8
+ %call1 = call i32* %3(i8* %a) [ "type"(metadata !"_ZTSFPvS_E.generalized") ]
+ call void @foo() [ "type"(metadata !"_ZTSFvE.generalized") ]
+ %4 = load i8, i8* %a, align 1
+ %call2 = call i32 @bar(i8 signext %4) [ "type"(metadata !"_ZTSFicE.generalized") ]
+ %call3 = call i32* @baz(i8* %a) [ "type"(metadata !"_ZTSFPvS_E.generalized") ]
+ ret i32 0
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+; Check that the numeric type id (md5 hash) for the below type ids are emitted
+; to the callgraph section.
+
+; CHECK: Hex dump of section '.callgraph':
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{!"clang version 13.0.0 ([email protected]:llvm/llvm-project.git 6d35f403b91c2f2c604e23763f699d580370ca96)"}
+; CHECK-DAG: 2444f731 f5eecb3e
+!4 = !{i64 0, !"_ZTSFvE.generalized"}
+; CHECK-DAG: 5486bc59 814b8e30
+!5 = !{i64 0, !"_ZTSFicE.generalized"}
+; CHECK-DAG: 7ade6814 f897fd77
+!6 = !{i64 0, !"_ZTSFPvS_E.generalized"}
+; CHECK-DAG: caaf769a 600968fa
+!7 = !{i64 0, !"_ZTSFiE.generalized"}
Index: llvm/lib/MC/MCObjectFileInfo.cpp
===================================================================
--- llvm/lib/MC/MCObjectFileInfo.cpp
+++ llvm/lib/MC/MCObjectFileInfo.cpp
@@ -530,6 +530,8 @@
EHFrameSection =
Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
+ CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0);
+
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
PseudoProbeSection = Ctx->getELFSection(".pseudo_probe", DebugSecType, 0);
@@ -1112,6 +1114,24 @@
llvm_unreachable("Unknown ObjectFormatType");
}
+MCSection *
+MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const {
+ if (Ctx->getObjectFileType() != MCContext::IsELF)
+ return CallGraphSection;
+
+ const MCSectionELF &ElfSec = static_cast<const MCSectionELF &>(TextSec);
+ unsigned Flags = ELF::SHF_LINK_ORDER;
+ StringRef GroupName;
+ if (const MCSymbol *Group = ElfSec.getGroup()) {
+ GroupName = Group->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ return Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, Flags, 0,
+ GroupName, true, ElfSec.getUniqueID(),
+ cast<MCSymbolELF>(TextSec.getBeginSymbol()));
+}
+
MCSection *
MCObjectFileInfo::getStackSizesSection(const MCSection &TextSec) const {
if ((Ctx->getObjectFileType() != MCContext::IsELF) ||
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1493,6 +1493,105 @@
*StackUsageStream << "static\n";
}
+/// Extracts a generalized numeric type identifier of a Function's type from
+/// type metadata. Returns null if metadata cannot be found.
+static ConstantInt *extractNumericCGTypeId(const Function &F) {
+ SmallVector<MDNode *, 2> Types;
+ F.getMetadata(LLVMContext::MD_type, Types);
+ MDString *MDGeneralizedTypeId = nullptr;
+ for (const auto &Type : Types) {
+ if (Type->getNumOperands() == 2 && isa<MDString>(Type->getOperand(1))) {
+ auto *TMDS = cast<MDString>(Type->getOperand(1));
+ if (TMDS->getString().endswith("generalized")) {
+ MDGeneralizedTypeId = TMDS;
+ break;
+ }
+ }
+ }
+
+ if (!MDGeneralizedTypeId) {
+ errs() << "warning: can't find indirect target type id metadata "
+ << "for " << F.getName() << "\n";
+ return nullptr;
+ }
+
+ uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString());
+ Type *Int64Ty = Type::getInt64Ty(F.getContext());
+ return cast<ConstantInt>(ConstantInt::get(Int64Ty, TypeIdVal));
+}
+
+/// Emits call graph section.
+void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
+ FunctionInfo &FuncInfo) {
+ if (!MF.getTarget().Options.EmitCallGraphSection)
+ return;
+
+ // Switch to the call graph section for the function
+ MCSection *FuncCGSection =
+ getObjFileLowering().getCallGraphSection(*getCurrentSection());
+ assert(FuncCGSection && "null call graph section");
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(FuncCGSection);
+
+ // Emit format version number.
+ OutStreamer->emitInt64(0);
+
+ // Emit function's self information, which is composed of:
+ // 1) FunctionEntryPc
+ // 2) FunctionKind: Whether the function is indirect target, and if so,
+ // whether its type id is known.
+ // 3) FunctionTypeId: Emit only when the function is an indirect target
+ // and its type id is known.
+
+ // Emit function entry pc.
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
+ OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+
+ // If this function has external linkage or has its address taken and
+ // it is not a callback, then anything could call it.
+ const Function &F = MF.getFunction();
+ bool IsIndirectTarget =
+ !F.hasLocalLinkage() || F.hasAddressTaken(nullptr,
+ /*IgnoreCallbackUses=*/true,
+ /*IgnoreAssumeLikeCalls=*/true,
+ /*IgnoreLLVMUsed=*/false);
+
+ // FIXME: FunctionKind takes a few values but emitted as a 64-bit value.
+ // Can be optimized to occupy 2 bits instead.
+ // Emit function kind, and type id if available.
+ if (!IsIndirectTarget) {
+ OutStreamer->emitInt64(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET);
+ } else {
+ const auto *TypeId = extractNumericCGTypeId(F);
+ if (TypeId) {
+ OutStreamer->emitInt64(
+ FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID);
+ OutStreamer->emitInt64(TypeId->getZExtValue());
+ } else {
+ OutStreamer->emitInt64(
+ FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID);
+ }
+ }
+
+ // Emit callsite labels, where each element is a pair of type id and
+ // indirect callsite pc.
+ const auto &CallSiteLabels = FuncInfo.CallSiteLabels;
+
+ // Emit the count of pairs.
+ OutStreamer->emitInt64(CallSiteLabels.size());
+
+ // Emit the type id and call site label pairs.
+ for (const std::pair<uint64_t, MCSymbol *> &El : CallSiteLabels) {
+ auto TypeId = El.first;
+ const auto &Label = El.second;
+ OutStreamer->emitInt64(TypeId);
+ OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
+ }
+ FuncInfo.CallSiteLabels.clear();
+
+ OutStreamer->popSection();
+}
+
void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF,
const MDNode &MD) {
MCSymbol *S = MF.getContext().createTempSymbol("pcsection");
@@ -1642,6 +1741,8 @@
bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+ FunctionInfo FuncInfo;
+ const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
@@ -1755,6 +1856,26 @@
break;
}
+ // FIXME: Some indirect calls can get lowered to jump instructions,
+ // resulting in emitting labels for them. The extra information can
+ // be neglected while disassembling but still takes space in the binary.
+ if (TM.Options.EmitCallGraphSection && MI.isCall()) {
+ // Only indirect calls have type identifiers set.
+ const auto &CallSiteInfo = CallSitesInfoMap.find(&MI);
+ if (CallSiteInfo != CallSitesInfoMap.end()) {
+ if (auto *TypeId = CallSiteInfo->second.TypeId) {
+ // Emit label.
+ MCSymbol *S = MF->getContext().createTempSymbol();
+ OutStreamer->emitLabel(S);
+
+ // Get type id value.
+ uint64_t TypeIdVal = TypeId->getZExtValue();
+
+ // Add to function's callsite labels.
+ FuncInfo.CallSiteLabels.emplace_back(TypeIdVal, S);
+ }
+ }
+ }
// If there is a post-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPostInstrSymbol())
OutStreamer->emitLabel(S);
@@ -1930,6 +2051,9 @@
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
+ // Emit section containing call graph metadata.
+ emitCallGraphSection(*MF, FuncInfo);
+
// Emit .su file containing function stack size information.
emitStackUsage(*MF);
@@ -2486,7 +2610,8 @@
F.hasFnAttribute("function-instrument") ||
F.hasFnAttribute("xray-instruction-threshold") ||
needFuncLabels(MF) || NeedsLocalForSize ||
- MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) {
+ MF.getTarget().Options.EmitStackSizeSection ||
+ MF.getTarget().Options.EmitCallGraphSection || MF.hasBBLabels()) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
Index: llvm/include/llvm/MC/MCObjectFileInfo.h
===================================================================
--- llvm/include/llvm/MC/MCObjectFileInfo.h
+++ llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -68,6 +68,9 @@
/// Language Specific Data Area information is emitted to.
MCSection *LSDASection = nullptr;
+ /// Section containing metadata on call graph.
+ MCSection *CallGraphSection = nullptr;
+
/// If exception handling is supported by the target and the target can
/// support a compact representation of the CIE and FDE, this is the section
/// to emit them into.
@@ -352,6 +355,8 @@
MCSection *getFaultMapSection() const { return FaultMapSection; }
MCSection *getRemarksSection() const { return RemarksSection; }
+ MCSection *getCallGraphSection(const MCSection &TextSec) const;
+
MCSection *getStackSizesSection(const MCSection &TextSec) const;
MCSection *getBBAddrMapSection(const MCSection &TextSec) const;
Index: llvm/include/llvm/CodeGen/AsmPrinter.h
===================================================================
--- llvm/include/llvm/CodeGen/AsmPrinter.h
+++ llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CODEGEN_ASMPRINTER_H
#define LLVM_CODEGEN_ASMPRINTER_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -188,6 +189,32 @@
/// Emit comments in assembly output if this is true.
bool VerboseAsm;
+ /// Store symbols and type identifiers used to create call graph section
+ /// entries related to a function.
+ struct FunctionInfo {
+ /// Numeric type identifier used in call graph section for indirect calls
+ /// and targets.
+ using CGTypeId = uint64_t;
+
+ /// Enumeration of function kinds, and their mapping to function kind values
+ /// stored in call graph section entries.
+ /// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp.
+ enum FunctionKind {
+ /// Function cannot be target to indirect calls.
+ NOT_INDIRECT_TARGET = 0,
+
+ /// Function may be target to indirect calls but its type id is unknown.
+ INDIRECT_TARGET_UNKNOWN_TID = 1,
+
+ /// Function may be target to indirect calls and its type id is known.
+ INDIRECT_TARGET_KNOWN_TID = 2,
+ };
+
+ /// Map type identifiers to callsite labels. Labels are only for indirect
+ /// calls and inclusive of all indirect calls of the function.
+ SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels;
+ };
+
/// Output stream for the stack usage file (i.e., .su file).
std::unique_ptr<raw_fd_ostream> StackUsageStream;
@@ -426,6 +453,8 @@
void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
virtual void emitKCFITypeId(const MachineFunction &MF);
+ void emitCallGraphSection(const MachineFunction &MF, FunctionInfo &FuncInfo);
+
void emitPseudoProbe(const MachineInstr &MI);
void emitRemarksSection(remarks::RemarkStreamer &RS);
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits