================ @@ -1,204 +1,417 @@ -//===-- InlineAsmPrepare - Prepare inline asm for code gen ----------------===// +//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's -// codegen. +// This pass lowers callbrs and inline asm in LLVM IR in order to assist +// SelectionDAG's codegen. // -// In particular, this pass assists in inserting register copies for the output -// values of a callbr along the edges leading to the indirect target blocks. -// Though the output SSA value is defined by the callbr instruction itself in -// the IR representation, the value cannot be copied to the appropriate virtual -// registers prior to jumping to an indirect label, since the jump occurs -// within the user-provided assembly blob. +// CallBrInst: // -// Instead, those copies must occur separately at the beginning of each -// indirect target. That requires that we create a separate SSA definition in -// each of them (via llvm.callbr.landingpad), and may require splitting -// critical edges so we have a location to place the intrinsic. Finally, we -// remap users of the original callbr output SSA value to instead point to the -// appropriate llvm.callbr.landingpad value. +// - Assists in inserting register copies for the output values of a callbr +// along the edges leading to the indirect target blocks. Though the output +// SSA value is defined by the callbr instruction itself in the IR +// representation, the value cannot be copied to the appropriate virtual +// registers prior to jumping to an indirect label, since the jump occurs +// within the user-provided assembly blob. // -// Ideally, this could be done inside SelectionDAG, or in the -// MachineInstruction representation, without the use of an IR-level intrinsic. -// But, within the current framework, it’s simpler to implement as an IR pass. -// (If support for callbr in GlobalISel is implemented, it’s worth considering -// whether this is still required.) +// Instead, those copies must occur separately at the beginning of each +// indirect target. That requires that we create a separate SSA definition +// in each of them (via llvm.callbr.landingpad), and may require splitting +// critical edges so we have a location to place the intrinsic. Finally, we +// remap users of the original callbr output SSA value to instead point to +// the appropriate llvm.callbr.landingpad value. +// +// Ideally, this could be done inside SelectionDAG, or in the +// MachineInstruction representation, without the use of an IR-level +// intrinsic. But, within the current framework, it’s simpler to implement +// as an IR pass. (If support for callbr in GlobalISel is implemented, +// it’s worth considering whether this is still required.) +// +// InlineAsm: +// +// - Prepares inline assembly for code generation with the fast register +// allocator. In particular, it defaults "rm" (register-or-memory) to +// prefer the "m" constraints (the front-end opts for the "r" constraint), +// simplifying register allocation by forcing operands to memory locations. +// The other register allocators are equipped to handle folding registers +// already, so don't need to change the default. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/InlineAsmPrepare.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; #define DEBUG_TYPE "inline-asm-prepare" -static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT); -static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, - DominatorTree &DT); -static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic, - SSAUpdater &SSAUpdate); -static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F); - namespace { class InlineAsmPrepare : public FunctionPass { public: InlineAsmPrepare() : FunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } bool runOnFunction(Function &F) override; + static char ID; }; -} // end anonymous namespace +char InlineAsmPrepare::ID = 0; -PreservedAnalyses InlineAsmPreparePass::run(Function &F, - FunctionAnalysisManager &FAM) { - bool Changed = false; - SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F); +} // end anonymous namespace - if (CBRs.empty()) - return PreservedAnalyses::all(); +INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) - auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); +FunctionPass *llvm::createInlineAsmPreparePass() { + return new InlineAsmPrepare(); +} - Changed |= SplitCriticalEdges(CBRs, DT); - Changed |= InsertIntrinsicCalls(CBRs, DT); +//===----------------------------------------------------------------------===// +// Process InlineAsm instructions +//===----------------------------------------------------------------------===// - if (!Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - return PA; +/// The inline asm constraint allows both register and memory. +static bool isRegMemConstraint(StringRef Constraint) { + return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr"); } -char InlineAsmPrepare::ID = 0; -INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) +/// Tag "rm" output constraints with '*' to signify that they default to a +/// memory location. +static std::pair<std::string, MemoryEffects> +convertConstraintsToMemory(StringRef ConstraintStr) { + std::vector<std::string> Constraints; + Constraints.reserve(ConstraintStr.count(',') + 1); + + MemoryEffects NewME = MemoryEffects::none(); + for (StringRef Constraint : llvm::split(ConstraintStr, ',')) { + std::string NewConstraint; + + auto I = Constraint.begin(), E = Constraint.end(); + bool IsOutput = false; + bool HasIndirect = false; + + if (*I == '=') { + ++I; + NewConstraint += '='; + IsOutput = true; + } + if (I == E) + return {std::string(), MemoryEffects::none()}; + if (*I == '*') { + ++I; + NewConstraint += '*'; + HasIndirect = true; + } + if (I == E) + return {std::string(), MemoryEffects::none()}; + if (*I == '+') { + ++I; + NewConstraint += '+'; + } + if (I == E) + return {std::string(), MemoryEffects::none()}; + + std::string RestConstraint(I, E); + if (isRegMemConstraint(RestConstraint)) { + if (IsOutput) + NewME |= MemoryEffects::argMemOnly(ModRefInfo::Mod); ---------------- bwendling wrote:
Yeah. I tried your suggestion to co.pile Linux with this transformation on (great suggestion !)and it failed pretty quickly. Trying to figure out why, I had the compiler dump out functions with inline assembly in them. None of them had an indirection indicator on inputs. My thought is that if we generate roughly what we did before this change then we should be "okay", and since Linux uses inline all over the place it's a good (though obviously not completely comprehensive) testcase. What did fail was how tied operands were handled. Fixing that code and not modifying the inputs made things work. It may omit some edge cases, so I'll look further, but right now it looks like we don't need to change inputs at least not in every context. https://github.com/llvm/llvm-project/pull/181973 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
