https://github.com/rafaelauler created https://github.com/llvm/llvm-project/pull/81346
Add a class that allows a process to introspect or investigate itself by disassembling its memory contents just-in-time with BOLT. An example is shown in a new unittest binary. This leverages the new ability to use BOLT as a library instead of as a regular executable that processes input binaries, demonstrating how to use BOLT as a library. >From 1167a59a185cbc113d6ca30f223e09be1abc8494 Mon Sep 17 00:00:00 2001 From: Rafael Auler <rafaelau...@fb.com> Date: Thu, 8 Feb 2024 19:57:14 -0800 Subject: [PATCH] [BOLT] Add binary introspection/JIT manager Add a class that allows a process to introspect or investigate itself by disassembling its memory contents just-in-time with BOLT. An example is shown in a new unittest binary. This leverages the new ability to use BOLT as a library instead of as a regular executable that processes input binaries, demonstrating how to use BOLT as a library. --- bolt/include/bolt/Core/BinaryContext.h | 9 +- bolt/include/bolt/Core/BinaryFunction.h | 1 + bolt/include/bolt/Core/BinarySection.h | 3 +- .../include/bolt/Rewrite/JITRewriteInstance.h | 105 +++++ bolt/lib/Core/BinaryContext.cpp | 39 +- bolt/lib/Rewrite/CMakeLists.txt | 1 + bolt/lib/Rewrite/DWARFRewriter.cpp | 2 +- bolt/lib/Rewrite/JITRewriteInstance.cpp | 367 ++++++++++++++++++ bolt/lib/Rewrite/MachORewriteInstance.cpp | 35 +- bolt/lib/Rewrite/RewriteInstance.cpp | 19 +- bolt/unittests/CMakeLists.txt | 1 + bolt/unittests/Core/BinaryContext.cpp | 4 +- bolt/unittests/Core/MCPlusBuilder.cpp | 4 +- bolt/unittests/Rewrite/CMakeLists.txt | 27 ++ bolt/unittests/Rewrite/JITRewriteInstance.cpp | 99 +++++ 15 files changed, 654 insertions(+), 62 deletions(-) create mode 100644 bolt/include/bolt/Rewrite/JITRewriteInstance.h create mode 100644 bolt/lib/Rewrite/JITRewriteInstance.cpp create mode 100644 bolt/unittests/Rewrite/CMakeLists.txt create mode 100644 bolt/unittests/Rewrite/JITRewriteInstance.cpp diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 30336c4e3a74fe..e5af8685d9b0d0 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -212,9 +212,6 @@ class BinaryContext { /// input file to internal section representation. DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection; - /// Low level section registration. - BinarySection ®isterSection(BinarySection *Section); - /// Store all functions in the binary, sorted by original address. std::map<uint64_t, BinaryFunction> BinaryFunctions; @@ -265,7 +262,8 @@ class BinaryContext { public: static Expected<std::unique_ptr<BinaryContext>> - createBinaryContext(const ObjectFile *File, bool IsPIC, + createBinaryContext(Triple TheTriple, StringRef InputFileName, + SubtargetFeatures *Features, bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger); @@ -1049,6 +1047,9 @@ class BinaryContext { BinarySection ®isterSection(const Twine &SectionName, const BinarySection &OriginalSection); + /// Low level section registration. + BinarySection ®isterSection(BinarySection *Section); + /// Register or update the information for the section with the given /// /p Name. If the section already exists, the information in the /// section will be updated with the new data. diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index a177178769e456..68c626d9a2f452 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -655,6 +655,7 @@ class BinaryFunction { BinaryFunction(const BinaryFunction &) = delete; friend class MachORewriteInstance; + friend class JITRewriteInstance; friend class RewriteInstance; friend class BinaryContext; friend class DataReader; diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h index a85dbf28950e31..d4f9b5955b2029 100644 --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -50,7 +50,7 @@ class BinarySection { std::string Name; // Section name const SectionRef Section; // SectionRef for input binary sections. StringRef Contents; // Input section contents - const uint64_t Address; // Address of section in input binary (may be 0) + uint64_t Address; // Address of section in input binary (may be 0) const uint64_t Size; // Input section size uint64_t InputFileOffset{0}; // Offset in the input binary unsigned Alignment; // alignment in bytes (must be > 0) @@ -461,6 +461,7 @@ class BinarySection { uint32_t getIndex() const { return Index; } // mutation + void setAddress(uint64_t Address) { this->Address = Address; } void setOutputAddress(uint64_t Address) { OutputAddress = Address; } void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; } void setSectionID(StringRef ID) { diff --git a/bolt/include/bolt/Rewrite/JITRewriteInstance.h b/bolt/include/bolt/Rewrite/JITRewriteInstance.h new file mode 100644 index 00000000000000..9833845deaa3d6 --- /dev/null +++ b/bolt/include/bolt/Rewrite/JITRewriteInstance.h @@ -0,0 +1,105 @@ +//===- bolt/Rewrite/JITRewriteInstance.h - in-memory rewriter ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface to control BOLT as JIT library +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_JIT_REWRITE_REWRITE_INSTANCE_H +#define BOLT_JIT_REWRITE_REWRITE_INSTANCE_H + +#include "bolt/Utils/NameResolver.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/StringSaver.h" +#include <memory> + +namespace llvm { + +namespace object { +class ObjectFile; +} + +namespace bolt { + +class BinaryContext; +class ProfileReaderBase; +struct JournalingStreams; + +/// Allows a process to instrospect itself by running BOLT to disassemble its +/// its own address space. +class JITRewriteInstance { + std::unique_ptr<BinaryContext> BC; + NameResolver NR; + StringSaver StrPool; + BumpPtrAllocator StrAllocator; + std::unique_ptr<ProfileReaderBase> ProfileReader; + + void adjustCommandLineOptions(); + Error preprocessProfileData(); + Error processProfileDataPreCFG(); + Error processProfileData(); + Error disassembleFunctions(); + Error buildFunctionsCFG(); + void postProcessFunctions(); + JITRewriteInstance(JournalingStreams Logger, bool IsPIC, Error &Err); + +public: + /// Create BOLT data structures/interface to deal with disassembly. Logger + /// contains the streams used for BOLT to report events (regular or errors) + /// that might happen while BOLT is trying to reconstruct a function from + /// binary level. + static Expected<std::unique_ptr<JITRewriteInstance>> + createJITRewriteInstance(JournalingStreams Logger, bool IsPIC); + ~JITRewriteInstance(); + + /// This is the main entry point used to make BOLT aware of a fragment of + /// memory space in the process. The user might need to reconstruct the + /// original ELF type/flags, such as using SHT_PROGBITS to inform + /// this is allocatable region and flags SHF_ALLOC | SHF_EXECINSTR to + /// flag a section containing code. + void registerJITSection(StringRef Name, uint64_t Address, StringRef Data, + unsigned Alignment, unsigned ELFType, + unsigned ELFFlags); + + /// Communicate to BOLT the boundaries of a function in a section of memory + /// previously registered with registerJITSection. + void registerJITFunction(StringRef Name, uintptr_t Addr, size_t Size); + + /// In case the user is using LLVM as an in-process JIT, and the user has + /// access over the ObjectFile instance loaded in memory, instead of using + /// registerJITSection/registerJITFunction pair, the user can just forward + /// that object here and JITRewriteInstance will read this object and call + /// registerJITSection/registerJITFunction the appropriate number of times + /// to map this object to BOLT. + Error notifyObjectLoaded(const object::ObjectFile &Obj); + + /// Mark all functions added so far as non-simple, so BOLT will skip them. + void disableAllFunctions(); + + /// Mark an specific function as simple, so BOLT will try to disassemble it. + void processFunctionContaining(uint64_t Address); + + /// Supply a profile file for BOLT to attach edge counts to the disassembled + /// functions. + Error setProfile(StringRef FileName); + + /// Run all the necessary steps to disassemble registered sections and + /// functions (process what we have so far). + Error run(); + + /// Print all BOLT's processed functions + void printAll(raw_ostream &OS); + + /// Print a specific function processed by BOLT + void printFunctionContaining(raw_ostream &OS, uint64_t Address); +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index d544ece13a832f..9a2780e8f3cecc 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -162,28 +162,30 @@ BinaryContext::~BinaryContext() { /// Create BinaryContext for a given architecture \p ArchName and /// triple \p TripleName. -Expected<std::unique_ptr<BinaryContext>> -BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, - std::unique_ptr<DWARFContext> DwCtx, - JournalingStreams Logger) { +Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( + Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features, + bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { StringRef ArchName = ""; std::string FeaturesStr = ""; - switch (File->getArch()) { + switch (TheTriple.getArch()) { case llvm::Triple::x86_64: + if (Features) + return createFatalBOLTError( + "x86_64 target does not use SubtargetFeatures"); ArchName = "x86-64"; FeaturesStr = "+nopl"; break; case llvm::Triple::aarch64: + if (Features) + return createFatalBOLTError( + "AArch64 target does not use SubtargetFeatures"); ArchName = "aarch64"; FeaturesStr = "+all"; break; case llvm::Triple::riscv64: { ArchName = "riscv64"; - Expected<SubtargetFeatures> Features = File->getFeatures(); - - if (auto E = Features.takeError()) - return std::move(E); - + if (!Features) + return createFatalBOLTError("RISCV target needs SubtargetFeatures"); // We rely on relaxation for some transformations (e.g., promoting all calls // to PseudoCALL and then making JITLink relax them). Since the relax // feature is not stored in the object file, we manually enable it. @@ -196,12 +198,11 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, "BOLT-ERROR: Unrecognized machine in ELF file"); } - auto TheTriple = std::make_unique<Triple>(File->makeTriple()); - const std::string TripleName = TheTriple->str(); + const std::string TripleName = TheTriple.str(); std::string Error; const Target *TheTarget = - TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); + TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error); if (!TheTarget) return createStringError(make_error_code(std::errc::not_supported), Twine("BOLT-ERROR: ", Error)); @@ -240,13 +241,13 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, Twine("BOLT-ERROR: no instruction info for target ", TripleName)); std::unique_ptr<MCContext> Ctx( - new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); + new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); std::unique_ptr<MCObjectFileInfo> MOFI( TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); Ctx->setObjectFileInfo(MOFI.get()); // We do not support X86 Large code model. Change this in the future. bool Large = false; - if (TheTriple->getArch() == llvm::Triple::aarch64) + if (TheTriple.getArch() == llvm::Triple::aarch64) Large = true; unsigned LSDAEncoding = Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; @@ -273,7 +274,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); std::unique_ptr<MCInstPrinter> InstructionPrinter( - TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, + TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo, *MII, *MRI)); if (!InstructionPrinter) return createStringError( @@ -285,8 +286,8 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, TheTarget->createMCCodeEmitter(*MII, *Ctx)); auto BC = std::make_unique<BinaryContext>( - std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, - std::string(TripleName), std::move(MCE), std::move(MOFI), + std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple), + TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI), std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), std::move(DisAsm), Logger); @@ -296,7 +297,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, BC->MAB = std::unique_ptr<MCAsmBackend>( BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); - BC->setFilename(File->getFileName()); + BC->setFilename(InputFileName); BC->HasFixedLoadAddress = !IsPIC; diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt index 6890f52e2b28bb..0bb80fbeef277e 100644 --- a/bolt/lib/Rewrite/CMakeLists.txt +++ b/bolt/lib/Rewrite/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_library(LLVMBOLTRewrite DWARFRewriter.cpp ExecutableFileMemoryManager.cpp JITLinkLinker.cpp + JITRewriteInstance.cpp LinuxKernelRewriter.cpp MachORewriteInstance.cpp MetadataManager.cpp diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 27fa937c7508c3..6fb9e9fafd0e38 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -1652,7 +1652,7 @@ namespace { std::unique_ptr<BinaryContext> createDwarfOnlyBC(const object::ObjectFile &File) { return cantFail(BinaryContext::createBinaryContext( - &File, false, + File.makeTriple(), File.getFileName(), nullptr, false, DWARFContext::create(File, DWARFContext::ProcessDebugRelocations::Ignore, nullptr, "", WithColor::defaultErrorHandler, WithColor::defaultWarningHandler), diff --git a/bolt/lib/Rewrite/JITRewriteInstance.cpp b/bolt/lib/Rewrite/JITRewriteInstance.cpp new file mode 100644 index 00000000000000..51537d261f0359 --- /dev/null +++ b/bolt/lib/Rewrite/JITRewriteInstance.cpp @@ -0,0 +1,367 @@ +//===- bolt/Rewrite/JITRewriteInstance.cpp - JIT rewriter -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Rewrite/JITRewriteInstance.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryEmitter.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/JumpTable.h" +#include "bolt/Core/MCPlusBuilder.h" +#include "bolt/Profile/DataAggregator.h" +#include "bolt/Rewrite/BinaryPassManager.h" +#include "bolt/Rewrite/RewriteInstance.h" +#include "bolt/Utils/Utils.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include <memory> + +namespace opts { + +using namespace llvm; +extern cl::opt<unsigned> AlignText; +extern cl::opt<bool> PrintSections; +extern cl::opt<bool> PrintDisasm; +extern cl::opt<bool> PrintCFG; +extern cl::opt<unsigned> Verbosity; +} // namespace opts + +namespace llvm { +namespace bolt { + +#define DEBUG_TYPE "bolt" + +Expected<std::unique_ptr<JITRewriteInstance>> +JITRewriteInstance::createJITRewriteInstance(JournalingStreams Logger, + bool IsPIC) { + Error Err = Error::success(); + std::unique_ptr<JITRewriteInstance> JITRI( + new JITRewriteInstance(Logger, IsPIC, Err)); + if (Err) + return std::move(Err); + return std::move(JITRI); +} + +JITRewriteInstance::JITRewriteInstance(JournalingStreams Logger, bool IsPIC, + Error &Err) + : StrPool(StrAllocator) { + ErrorAsOutParameter EAO(&Err); + Triple TheTriple(sys::getDefaultTargetTriple().c_str()); + + auto BCOrErr = BinaryContext::createBinaryContext( + TheTriple, StringRef("JIT input file"), nullptr, IsPIC, nullptr, Logger); + if (Error E = BCOrErr.takeError()) { + Err = std::move(E); + return; + } + BC = std::move(BCOrErr.get()); + BC->initializeTarget(std::unique_ptr<MCPlusBuilder>( + createMCPlusBuilder(BC->TheTriple->getArch(), BC->MIA.get(), + BC->MII.get(), BC->MRI.get(), BC->STI.get()))); + BC->FirstAllocAddress = 0; + BC->LayoutStartAddress = 0xffffffffffffffff; +} + +JITRewriteInstance::~JITRewriteInstance() {} + +void JITRewriteInstance::adjustCommandLineOptions() { + if (!opts::AlignText.getNumOccurrences()) + opts::AlignText = BC->PageAlign; +} + +Error JITRewriteInstance::preprocessProfileData() { + if (!ProfileReader) + return Error::success(); + if (Error E = ProfileReader->preprocessProfile(*BC.get())) + return Error(std::move(E)); + return Error::success(); +} + +Error JITRewriteInstance::processProfileDataPreCFG() { + if (!ProfileReader) + return Error::success(); + if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) + return Error(std::move(E)); + return Error::success(); +} + +Error JITRewriteInstance::processProfileData() { + if (!ProfileReader) + return Error::success(); + if (Error E = ProfileReader->readProfile(*BC.get())) + return Error(std::move(E)); + return Error::success(); +} + +Error JITRewriteInstance::disassembleFunctions() { + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + if (!Function.isSimple()) + continue; + if (Error E = Function.disassemble()) + return Error(std::move(E)); + if (opts::PrintDisasm) + Function.print(BC->outs(), "after disassembly"); + } + return Error::success(); +} + +Error JITRewriteInstance::buildFunctionsCFG() { + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + if (!Function.isSimple()) + continue; + if (auto NewE = handleErrors( + Function.buildCFG(/*AllocId*/ 0), [&](const BOLTError &E) -> Error { + if (E.isFatal()) + return Error(std::make_unique<BOLTError>(std::move(E))); + if (!E.getMessage().empty()) + E.log(BC->errs()); + return Error::success(); + })) { + return Error(std::move(NewE)); + } + } + return Error::success(); +} + +void JITRewriteInstance::postProcessFunctions() { + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + if (Function.empty() || !Function.isSimple()) + continue; + Function.postProcessCFG(); + if (opts::PrintCFG) + Function.print(outs(), "after building cfg"); + } +} + +void JITRewriteInstance::registerJITSection(StringRef Name, uint64_t Address, + StringRef Data, unsigned Alignment, + unsigned ELFType, + unsigned ELFFlags) { + auto *Sec = + new BinarySection(*BC, Name, const_cast<uint8_t *>(Data.bytes_begin()), + Data.size(), Alignment, ELFType, ELFFlags); + Sec->setAddress(Address); + BC->registerSection(Sec); +} + +void JITRewriteInstance::registerJITFunction(StringRef Name, uintptr_t Addr, + size_t Size) { + if (ErrorOr<BinarySection &> Sec = BC->getSectionForAddress(Addr)) + BC->createBinaryFunction(Name.str(), *Sec, Addr, Size); +} + +Error JITRewriteInstance::notifyObjectLoaded(const object::ObjectFile &Obj) { + for (const object::SectionRef &Section : Obj.sections()) { + Expected<StringRef> SectionName = Section.getName(); + if (Error E = SectionName.takeError()) + return Error(std::move(E)); + // Only register sections with names. + if (SectionName->empty()) + continue; + + StringRef UniqueSectionName = StrPool.save(NR.uniquify(*SectionName)); + unsigned ELFType = ELFSectionRef(Section).getType(); + unsigned ELFFlags = ELFSectionRef(Section).getFlags(); + if (ELFType == ELF::SHT_NOBITS) + continue; + + const uint64_t Address = Section.getAddress(); + const uint64_t Size = Section.getSize(); + StringRef Contents = + StringRef(reinterpret_cast<const char *>(Address), Size); + if (Contents.empty()) + continue; + + this->registerJITSection(UniqueSectionName, Section.getAddress(), Contents, + Section.getAlignment().value(), ELFType, ELFFlags); + LLVM_DEBUG( + dbgs() << "BOLT-DEBUG: registering section " << *SectionName << " @ 0x" + << Twine::utohexstr(Section.getAddress()) << ":0x" + << Twine::utohexstr(Section.getAddress() + Section.getSize()) + << "\n"); + } + + if (opts::PrintSections) { + BC->outs() << "BOLT-INFO: Sections from original binary:\n"; + BC->printSections(BC->outs()); + } + + std::vector<SymbolRef> FunctionSymbols; + for (const SymbolRef &S : Obj.symbols()) { + auto TypeOrErr = S.getType(); + if (Error E = TypeOrErr.takeError()) + return Error(std::move(E)); + SymbolRef::Type Type = *TypeOrErr; + if (Type == SymbolRef::ST_Function) + FunctionSymbols.push_back(S); + } + + if (FunctionSymbols.empty()) + return Error::success(); + + Error SortErrors = Error::success(); + llvm::stable_sort(FunctionSymbols, [&](const SymbolRef &LHS, + const SymbolRef &RHS) { + auto LHSAddrOrErr = LHS.getAddress(); + auto RHSAddrOrErr = RHS.getAddress(); + if (auto E = + joinErrors(LHSAddrOrErr.takeError(), RHSAddrOrErr.takeError())) { + SortErrors = joinErrors(std::move(SortErrors), std::move(E)); + return false; + } + return *LHSAddrOrErr < *RHSAddrOrErr; + }); + if (SortErrors) + return Error(std::move(SortErrors)); + + for (size_t Index = 0; Index < FunctionSymbols.size(); ++Index) { + auto AddrOrErr = FunctionSymbols[Index].getAddress(); + if (auto E = AddrOrErr.takeError()) + return Error(std::move(E)); + + const uint64_t Address = *AddrOrErr; + ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); + if (!Section) + continue; + + auto NameOrErr = FunctionSymbols[Index].getName(); + auto FlagsOrErr = FunctionSymbols[Index].getFlags(); + auto SecOrErr = FunctionSymbols[Index].getSection(); + if (auto E = joinErrors( + joinErrors(NameOrErr.takeError(), FlagsOrErr.takeError()), + SecOrErr.takeError())) + return Error(std::move(E)); + std::string SymbolName = NameOrErr->str(); + // Uniquify names of local symbols. + if (!(*FlagsOrErr & SymbolRef::SF_Global)) + SymbolName = NR.uniquify(SymbolName); + + section_iterator S = *SecOrErr; + uint64_t EndAddress = S->getAddress() + S->getSize(); + + size_t NFIndex = Index + 1; + // Skip aliases. + auto NextAddrOrErr = FunctionSymbols[NFIndex].getAddress(); + if (auto E = NextAddrOrErr.takeError()) + return Error(std::move(E)); + uint64_t NextAddr = *NextAddrOrErr; + while (NFIndex < FunctionSymbols.size() && NextAddr == Address) { + ++NFIndex; + auto NFAddrOrErr = FunctionSymbols[NFIndex].getAddress(); + if (auto E = NFAddrOrErr.takeError()) + return Error(std::move(E)); + NextAddr = *NFAddrOrErr; + } + + auto NFSecOrErr = FunctionSymbols[NFIndex].getSection(); + if (auto E = NFSecOrErr.takeError()) + return Error(std::move(E)); + if (NFIndex < FunctionSymbols.size() && S == *NFSecOrErr) { + auto EndAddressOrErr = FunctionSymbols[NFIndex].getAddress(); + if (auto E = EndAddressOrErr.takeError()) + return Error(std::move(E)); + EndAddress = *EndAddressOrErr; + } + + const uint64_t SymbolSize = EndAddress - Address; + const auto It = BC->getBinaryFunctions().find(Address); + if (It == BC->getBinaryFunctions().end()) { + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating binary function for " + << SymbolName << "\n"); + BC->createBinaryFunction(std::move(SymbolName), *Section, Address, + SymbolSize); + } else { + It->second.addAlternativeName(std::move(SymbolName)); + } + } + + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + Function.setMaxSize(Function.getSize()); + + ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); + if (!FunctionData) { + BC->errs() << "BOLT-ERROR: corresponding section is non-executable or " + << "empty for function " << Function << '\n'; + continue; + } + + if (Function.getSize() == 0) + Function.setSimple(false); + } + + return Error::success(); +} + +void JITRewriteInstance::disableAllFunctions() { + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + Function.setSimple(false); + } +} + +void JITRewriteInstance::processFunctionContaining(uint64_t Address) { + if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(Address)) + Func->setSimple(true); +} + +Error JITRewriteInstance::setProfile(StringRef Filename) { + if (!sys::fs::exists(Filename)) + return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); + + ProfileReader = std::make_unique<DataAggregator>(Filename); + return Error::success(); +} + +Error JITRewriteInstance::run() { + adjustCommandLineOptions(); + + if (Error E = preprocessProfileData()) + return Error(std::move(E)); + + if (Error E = disassembleFunctions()) + return Error(std::move(E)); + + if (Error E = processProfileDataPreCFG()) + return Error(std::move(E)); + + if (Error E = buildFunctionsCFG()) + return Error(std::move(E)); + + if (Error E = processProfileData()) + return Error(std::move(E)); + + postProcessFunctions(); + + return Error::success(); +} + +void JITRewriteInstance::printAll(raw_ostream &OS) { + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + if (Function.empty()) + continue; + Function.print(OS, "after building cfg"); + } +} + +void JITRewriteInstance::printFunctionContaining(raw_ostream &OS, + uint64_t Address) { + if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(Address)) { + OS << formatv("Printing function containg address {0:x}\n", Address); + Func->print(OS, "JIT on-demand inspection"); + } +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp index 0970a0507ebe88..172cb640bf911a 100644 --- a/bolt/lib/Rewrite/MachORewriteInstance.cpp +++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp @@ -18,6 +18,7 @@ #include "bolt/Rewrite/BinaryPassManager.h" #include "bolt/Rewrite/ExecutableFileMemoryManager.h" #include "bolt/Rewrite/JITLinkLinker.h" +#include "bolt/Rewrite/RewriteInstance.h" #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" #include "bolt/Utils/Utils.h" #include "llvm/MC/MCObjectStreamer.h" @@ -54,37 +55,6 @@ extern cl::opt<unsigned> Verbosity; namespace llvm { namespace bolt { -extern MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *, - const MCInstrInfo *, - const MCRegisterInfo *, - const MCSubtargetInfo *); -extern MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *, - const MCInstrInfo *, - const MCRegisterInfo *, - const MCSubtargetInfo *); - -namespace { - -MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, - const MCInstrAnalysis *Analysis, - const MCInstrInfo *Info, - const MCRegisterInfo *RegInfo, - const MCSubtargetInfo *STI) { -#ifdef X86_AVAILABLE - if (Arch == Triple::x86_64) - return createX86MCPlusBuilder(Analysis, Info, RegInfo, STI); -#endif - -#ifdef AARCH64_AVAILABLE - if (Arch == Triple::aarch64) - return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI); -#endif - - llvm_unreachable("architecture unsupported by MCPlusBuilder"); -} - -} // anonymous namespace - #define DEBUG_TYPE "bolt" Expected<std::unique_ptr<MachORewriteInstance>> @@ -103,7 +73,8 @@ MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile, : InputFile(InputFile), ToolPath(ToolPath) { ErrorAsOutParameter EAO(&Err); auto BCOrErr = BinaryContext::createBinaryContext( - InputFile, /* IsPIC */ true, DWARFContext::create(*InputFile), + InputFile->makeTriple(), InputFile->getFileName(), nullptr, + /* IsPIC */ true, DWARFContext::create(*InputFile), {llvm::outs(), llvm::errs()}); if (Error E = BCOrErr.takeError()) { Err = std::move(E); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index db093bfc2d8b78..2fdf7a89d901a7 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -269,6 +269,10 @@ namespace bolt { extern const char *BoltRevision; +// Weird location for createMCPlusBuilder, but this is here to avoid a +// cyclic dependency of libCore (its natural place) and libTarget. libRewrite +// can depend on libTarget, but not libCore. Since libRewrite is the only +// user of this function, we define it here. MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, const MCInstrAnalysis *Analysis, const MCInstrInfo *Info, @@ -346,8 +350,21 @@ RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, Stderr.SetUnbuffered(); LLVM_DEBUG(dbgs().SetUnbuffered()); + // Read RISCV subtarget features from input file + std::unique_ptr<SubtargetFeatures> Features; + Triple TheTriple = File->makeTriple(); + if (TheTriple.getArch() == llvm::Triple::riscv64) { + Expected<SubtargetFeatures> FeaturesOrErr = File->getFeatures(); + if (auto E = FeaturesOrErr.takeError()) { + Err = std::move(E); + return; + } else { + Features.reset(new SubtargetFeatures(*FeaturesOrErr)); + } + } + auto BCOrErr = BinaryContext::createBinaryContext( - File, IsPIC, + TheTriple, File->getFileName(), Features.get(), IsPIC, DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, nullptr, opts::DWPPathName, WithColor::defaultErrorHandler, diff --git a/bolt/unittests/CMakeLists.txt b/bolt/unittests/CMakeLists.txt index 77159e92dec557..de874476d2fc6a 100644 --- a/bolt/unittests/CMakeLists.txt +++ b/bolt/unittests/CMakeLists.txt @@ -7,3 +7,4 @@ endfunction() add_subdirectory(Core) add_subdirectory(Profile) +add_subdirectory(Rewrite) diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp index 1fbb07bca966a7..19f0b22486583d 100644 --- a/bolt/unittests/Core/BinaryContext.cpp +++ b/bolt/unittests/Core/BinaryContext.cpp @@ -40,8 +40,8 @@ struct BinaryContextTester : public testing::TestWithParam<Triple::ArchType> { void initializeBOLT() { BC = cantFail(BinaryContext::createBinaryContext( - ObjFile.get(), true, DWARFContext::create(*ObjFile.get()), - {llvm::outs(), llvm::errs()})); + ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true, + DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()})); ASSERT_FALSE(!BC); } diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp index 63448039c53e67..240319aa71b866 100644 --- a/bolt/unittests/Core/MCPlusBuilder.cpp +++ b/bolt/unittests/Core/MCPlusBuilder.cpp @@ -50,8 +50,8 @@ struct MCPlusBuilderTester : public testing::TestWithParam<Triple::ArchType> { void initializeBolt() { BC = cantFail(BinaryContext::createBinaryContext( - ObjFile.get(), true, DWARFContext::create(*ObjFile.get()), - {llvm::outs(), llvm::errs()})); + ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true, + DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()})); ASSERT_FALSE(!BC); BC->initializeTarget(std::unique_ptr<MCPlusBuilder>( createMCPlusBuilder(GetParam(), BC->MIA.get(), BC->MII.get(), diff --git a/bolt/unittests/Rewrite/CMakeLists.txt b/bolt/unittests/Rewrite/CMakeLists.txt new file mode 100644 index 00000000000000..89da98fb5f4cdd --- /dev/null +++ b/bolt/unittests/Rewrite/CMakeLists.txt @@ -0,0 +1,27 @@ +set(LLVM_LINK_COMPONENTS + DebugInfoDWARF + Object + MC + ${LLVM_TARGETS_TO_BUILD} + ) + +add_bolt_unittest(BOLTRewriteTests + JITRewriteInstance.cpp + + DISABLE_LLVM_LINK_LLVM_DYLIB + ) + +target_link_libraries(BOLTRewriteTests + PRIVATE + LLVMBOLTCore + LLVMBOLTRewrite + ) + +foreach (tgt ${BOLT_TARGETS_TO_BUILD}) + include_directories( + ${LLVM_MAIN_SRC_DIR}/lib/Target/${tgt} + ${LLVM_BINARY_DIR}/lib/Target/${tgt} + ) + string(TOUPPER "${tgt}" upper) + target_compile_definitions(BOLTRewriteTests PRIVATE "${upper}_AVAILABLE") +endforeach() diff --git a/bolt/unittests/Rewrite/JITRewriteInstance.cpp b/bolt/unittests/Rewrite/JITRewriteInstance.cpp new file mode 100644 index 00000000000000..185b770769c3c9 --- /dev/null +++ b/bolt/unittests/Rewrite/JITRewriteInstance.cpp @@ -0,0 +1,99 @@ +#include "bolt/Rewrite/JITRewriteInstance.h" +#include "bolt/Core/BinaryContext.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; +using namespace bolt; + +namespace { +struct JITRewriteInstanceTester + : public testing::TestWithParam<Triple::ArchType> { + void SetUp() override { + initalizeLLVM(); + initializeBOLT(); + } + +protected: + void initalizeLLVM() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + } + + void initializeBOLT() { + BOLTJIT = cantFail(bolt::JITRewriteInstance::createJITRewriteInstance( + {llvm::outs(), llvm::errs()}, /*IsPIC*/ false)); + ASSERT_FALSE(!BOLTJIT); + } + + std::unique_ptr<JITRewriteInstance> BOLTJIT; +}; +} // namespace + +#ifdef X86_AVAILABLE + +// clang-format off +extern "C" __attribute((naked)) int fib(int n) +{ + __asm__ __volatile__( + "pushq %%r14\n" + "pushq %%rbx\n" + "pushq %%rax\n" + "movl %%edi, %%r14d\n" + "xorl %%ebx, %%ebx\n" + "cmpl $0x2, %%edi\n" + "jge .Ltmp0\n" + "movl %%r14d, %%ecx\n" + "jmp .Ltmp1\n" + ".Ltmp0:\n" + "xorl %%ebx, %%ebx\n" + "nopw %%cs:(%%rax,%%rax)\n" + ".Ltmp2:\n" + "leal -0x1(%%r14), %%edi\n" + "callq fib\n" + "leal -0x2(%%r14), %%ecx\n" + "addl %%eax, %%ebx\n" + "cmpl $0x3, %%r14d\n" + "movl %%ecx, %%r14d\n" + "ja .Ltmp2\n" + ".Ltmp1:\n" + "addl %%ecx, %%ebx\n" + "movl %%ebx, %%eax\n" + "addq $0x8, %%rsp\n" + "popq %%rbx\n" + "popq %%r14\n" + "retq\n" + :::); +} +// clang-format on + +INSTANTIATE_TEST_SUITE_P(X86, JITRewriteInstanceTester, + ::testing::Values(Triple::x86_64)); + +TEST_P(JITRewriteInstanceTester, DisassembleFib) { + EXPECT_EQ(fib(7), 13); + + // BOLT JIT test/example + // Analyze fib function in this binary + // Disassemble 63 bytes + uint64_t Address = reinterpret_cast<uint64_t>(&fib); + StringRef Data = StringRef(reinterpret_cast<const char *>(&fib), 63); + + BOLTJIT->registerJITSection(StringRef(".text.example"), Address, Data, 1, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); + BOLTJIT->registerJITFunction(StringRef("fib"), Address, 63); + ASSERT_FALSE(BOLTJIT->run()); + + // Print to screen + BOLTJIT->printAll(outs()); +} + +#endif _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits