================ @@ -1,171 +1,425 @@ -//===-- InlineAsmPrepare - Prepare inline asm for code gen ----------------===// +//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's -// codegen. +// This pass lowers callbrs and inline asm in LLVM IR in order to assist +// SelectionDAG's codegen. // -// In particular, this pass assists in inserting register copies for the output -// values of a callbr along the edges leading to the indirect target blocks. -// Though the output SSA value is defined by the callbr instruction itself in -// the IR representation, the value cannot be copied to the appropriate virtual -// registers prior to jumping to an indirect label, since the jump occurs -// within the user-provided assembly blob. +// CallBrInst: // -// Instead, those copies must occur separately at the beginning of each -// indirect target. That requires that we create a separate SSA definition in -// each of them (via llvm.callbr.landingpad), and may require splitting -// critical edges so we have a location to place the intrinsic. Finally, we -// remap users of the original callbr output SSA value to instead point to the -// appropriate llvm.callbr.landingpad value. +// - Assists in inserting register copies for the output values of a callbr +// along the edges leading to the indirect target blocks. Though the output +// SSA value is defined by the callbr instruction itself in the IR +// representation, the value cannot be copied to the appropriate virtual +// registers prior to jumping to an indirect label, since the jump occurs +// within the user-provided assembly blob. // -// Ideally, this could be done inside SelectionDAG, or in the -// MachineInstruction representation, without the use of an IR-level intrinsic. -// But, within the current framework, it’s simpler to implement as an IR pass. -// (If support for callbr in GlobalISel is implemented, it’s worth considering -// whether this is still required.) +// Instead, those copies must occur separately at the beginning of each +// indirect target. That requires that we create a separate SSA definition +// in each of them (via llvm.callbr.landingpad), and may require splitting +// critical edges so we have a location to place the intrinsic. Finally, we +// remap users of the original callbr output SSA value to instead point to +// the appropriate llvm.callbr.landingpad value. +// +// Ideally, this could be done inside SelectionDAG, or in the +// MachineInstruction representation, without the use of an IR-level +// intrinsic. But, within the current framework, it’s simpler to implement +// as an IR pass. (If support for callbr in GlobalISel is implemented, +// it’s worth considering whether this is still required.) +// +// InlineAsm: +// +// - Prepares inline assembly for code generation with the fast register +// allocator. In particular, it defaults "rm" (register-or-memory) to +// prefer the "m" constraints (the front-end opts for the "r" constraint), +// simplifying register allocation by forcing operands to memory locations. +// The other register allocators are equipped to handle folding registers +// already, so don't need to change the default. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/InlineAsmPrepare.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; #define DEBUG_TYPE "inline-asm-prepare" -static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT); -static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, - DominatorTree &DT); -static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic, - SSAUpdater &SSAUpdate); -static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F); - namespace { class InlineAsmPrepare : public FunctionPass { public: InlineAsmPrepare() : FunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } bool runOnFunction(Function &F) override; + static char ID; }; -} // end anonymous namespace +char InlineAsmPrepare::ID = 0; -PreservedAnalyses InlineAsmPreparePass::run(Function &F, - FunctionAnalysisManager &FAM) { - bool Changed = false; - SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F); +} // end anonymous namespace - if (CBRs.empty()) - return PreservedAnalyses::all(); +INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) - auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); +FunctionPass *llvm::createInlineAsmPreparePass() { + return new InlineAsmPrepare(); +} - Changed |= SplitCriticalEdges(CBRs, DT); - Changed |= InsertIntrinsicCalls(CBRs, DT); +//===----------------------------------------------------------------------===// +// Process InlineAsm instructions +//===----------------------------------------------------------------------===// - if (!Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - return PA; +/// The inline asm constraint allows both register and memory. +static bool IsRegMemConstraint(StringRef Constraint) { + return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr"); } -char InlineAsmPrepare::ID = 0; -INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) +/// Tag "rm" output constraints with '*' to signify that they default to a +/// memory location. +static std::pair<std::string, bool> +ConvertConstraintsToMemory(StringRef ConstraintStr) { + auto I = ConstraintStr.begin(), E = ConstraintStr.end(); + std::string Out; + raw_string_ostream O(Out); + bool HasRegMem = false; + + while (I != E) { + bool IsOutput = false; + bool HasIndirect = false; + if (*I == '=') { + O << *I; + IsOutput = true; + ++I; + if (I == E) + return {}; + } + if (*I == '*') { + O << '*'; + HasIndirect = true; + ++I; + if (I == E) + return {}; + } + if (*I == '+') { + O << '+'; + IsOutput = true; + ++I; + if (I == E) + return {}; + } -FunctionPass *llvm::createInlineAsmPreparePass() { - return new InlineAsmPrepare(); + auto Comma = std::find(I, E, ','); + std::string Sub(I, Comma); + if (IsRegMemConstraint(Sub)) { + HasRegMem = true; + if (IsOutput && !HasIndirect) + O << '*'; + } + + O << Sub; + + if (Comma == E) + break; + + O << ','; + I = Comma + 1; + } + + return {Out, HasRegMem}; } -void InlineAsmPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTreeWrapperPass>(); +/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an +/// input tied to output constraint j. +static void +BuildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints, + SmallVectorImpl<int> &TiedOutput) { + for (unsigned I = 0, E = Constraints.size(); I != E; ++I) { + const InlineAsm::ConstraintInfo &C = Constraints[I]; + if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) { + int InputIdx = C.MatchingInput; + if (InputIdx >= 0 && InputIdx < (int)Constraints.size()) + TiedOutput[InputIdx] = I; + } + + if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) { + int OutputIdx = C.MatchingInput; + if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size()) + TiedOutput[I] = OutputIdx; + } + } } -SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) { - SmallVector<CallBrInst *, 2> CBRs; - for (BasicBlock &BB : F) - if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator())) - if (!CBR->getType()->isVoidTy() && !CBR->use_empty()) - CBRs.push_back(CBR); - return CBRs; +/// Process an output constraint, creating allocas for converted constraints. +static void ProcessOutputConstraint( + const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx, + IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs, + SmallVectorImpl<Type *> &NewArgTypes, SmallVectorImpl<Type *> &NewRetTypes, + SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs, + SmallVectorImpl<AllocaInst *> &OutputAllocas, unsigned ConstraintIdx) { + Type *SlotTy = RetTy; + if (StructType *ST = dyn_cast<StructType>(RetTy)) + SlotTy = ST->getElementType(OutputIdx); + + if (C.hasRegMemConstraints()) { + // Converted to memory constraint. Create alloca and pass pointer as + // argument. + AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem"); + NewArgs.push_back(Slot); + NewArgTypes.push_back(Slot->getType()); + ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy}); + OutputAllocas[ConstraintIdx] = Slot; + // No return value for this output since it's now an out-parameter. + } else { + // Unchanged, still an output return value. + NewRetTypes.push_back(SlotTy); + } } -bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) { - bool Changed = false; - CriticalEdgeSplittingOptions Options(&DT); - Options.setMergeIdenticalEdges(); +/// Process an input constraint, handling tied constraints and conversions. +static void ProcessInputConstraint(const InlineAsm::ConstraintInfo &C, + Value *ArgVal, ArrayRef<int> TiedOutput, + ArrayRef<AllocaInst *> OutputAllocas, + unsigned ConstraintIdx, IRBuilder<> &Builder, + IRBuilder<> &EntryBuilder, + SmallVectorImpl<Value *> &NewArgs, + SmallVectorImpl<Type *> &NewArgTypes) { + Type *ArgTy = ArgVal->getType(); + + if (TiedOutput[ConstraintIdx] != -1) { + int MatchIdx = TiedOutput[ConstraintIdx]; + if (AllocaInst *Slot = OutputAllocas[MatchIdx]) { + // The matched output was converted to memory. Store this input into the + // alloca. + Builder.CreateStore(ArgVal, Slot); + + // Pass the alloca pointer as the argument, instead of ArgVal. This + // ensures the tied "0" constraint matches the "*m" output. + NewArgs.push_back(Slot); + NewArgTypes.push_back(Slot->getType()); + return; + } + } - // The indirect destination might be duplicated between another parameter... - // %0 = callbr ... [label %x, label %x] - // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need - // to split the default destination if it's duplicated between an indirect - // destination... - // %1 = callbr ... to label %x [label %x] - // ...hence starting at 1 and checking against successor 0 (aka the default - // destination). - for (CallBrInst *CBR : CBRs) - for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i) - if (CBR->getSuccessor(i) == CBR->getSuccessor(0) || - isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true)) - if (SplitKnownCriticalEdge(CBR, i, Options)) - Changed = true; - return Changed; + if (C.hasRegMemConstraints()) { + // Converted to memory constraint. Create alloca, store input, pass pointer + // as argument. + AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem"); + Builder.CreateStore(ArgVal, Slot); + NewArgs.push_back(Slot); + NewArgTypes.push_back(Slot->getType()); + } else { + // Unchanged + NewArgs.push_back(ArgVal); + NewArgTypes.push_back(ArgTy); + } } -bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) { - bool Changed = false; - SmallPtrSet<const BasicBlock *, 4> Visited; - IRBuilder<> Builder(CBRs[0]->getContext()); - for (CallBrInst *CBR : CBRs) { - if (!CBR->getNumIndirectDests()) - continue; +/// Build the return type from the collected return types. +static Type *BuildReturnType(ArrayRef<Type *> NewRetTypes, + LLVMContext &Context) { + if (NewRetTypes.empty()) + return Type::getVoidTy(Context); - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(CBR->getType(), CBR->getName()); - SSAUpdate.AddAvailableValue(CBR->getParent(), CBR); - SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR); + if (NewRetTypes.size() == 1) + return NewRetTypes[0]; + + return StructType::get(Context, NewRetTypes); +} - for (BasicBlock *IndDest : CBR->getIndirectDests()) { - if (!Visited.insert(IndDest).second) +/// Create the new inline assembly call with converted constraints. +static CallInst *CreateNewInlineAsm( + InlineAsm *IA, const std::string &NewConstraintStr, Type *NewRetTy, + const SmallVectorImpl<Type *> &NewArgTypes, + const SmallVectorImpl<Value *> &NewArgs, + const SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs, + CallBase *CB, IRBuilder<> &Builder, LLVMContext &Context) { + FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false); + InlineAsm *NewIA = InlineAsm::get( + NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(), + IA->isAlignStack(), IA->getDialect(), IA->canThrow()); + + CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs); + NewCall->setCallingConv(CB->getCallingConv()); + NewCall->setAttributes(CB->getAttributes()); + NewCall->setDebugLoc(CB->getDebugLoc()); + + for (const auto &[Index, Ty] : ElementTypeAttrs) + NewCall->addParamAttr(Index, + Attribute::get(Context, Attribute::ElementType, Ty)); + + return NewCall; +} + +/// Reconstruct the return value from the new call and allocas. +static Value * +ReconstructReturnValue(Type *RetTy, CallInst *NewCall, + const InlineAsm::ConstraintInfoVector &Constraints, + const SmallVectorImpl<AllocaInst *> &OutputAllocas, + const SmallVectorImpl<Type *> &NewRetTypes, + IRBuilder<> &Builder) { + if (RetTy->isVoidTy()) + return nullptr; + + if (isa<StructType>(RetTy)) { + // Multiple outputs. Reconstruct the struct. + Value *Res = PoisonValue::get(RetTy); + unsigned NewRetIdx = 0; + unsigned OriginalOutIdx = 0; + + for (unsigned I = 0, E = Constraints.size(); I != E; ++I) { + if (Constraints[I].Type != InlineAsm::isOutput) continue; - Builder.SetInsertPoint(&*IndDest->begin()); - CallInst *Intrinsic = Builder.CreateIntrinsic( - CBR->getType(), Intrinsic::callbr_landingpad, {CBR}); - SSAUpdate.AddAvailableValue(IndDest, Intrinsic); - UpdateSSA(DT, CBR, Intrinsic, SSAUpdate); - Changed = true; + + Value *Val = nullptr; + if (AllocaInst *Slot = OutputAllocas[I]) { + // Converted to memory. Load from alloca. + Val = Builder.CreateLoad(Slot->getAllocatedType(), Slot); ---------------- bwendling wrote:
Done. https://github.com/llvm/llvm-project/pull/181973 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
