================ @@ -1,171 +1,425 @@ -//===-- InlineAsmPrepare - Prepare inline asm for code gen ----------------===// +//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's -// codegen. +// This pass lowers callbrs and inline asm in LLVM IR in order to assist +// SelectionDAG's codegen. // -// In particular, this pass assists in inserting register copies for the output -// values of a callbr along the edges leading to the indirect target blocks. -// Though the output SSA value is defined by the callbr instruction itself in -// the IR representation, the value cannot be copied to the appropriate virtual -// registers prior to jumping to an indirect label, since the jump occurs -// within the user-provided assembly blob. +// CallBrInst: // -// Instead, those copies must occur separately at the beginning of each -// indirect target. That requires that we create a separate SSA definition in -// each of them (via llvm.callbr.landingpad), and may require splitting -// critical edges so we have a location to place the intrinsic. Finally, we -// remap users of the original callbr output SSA value to instead point to the -// appropriate llvm.callbr.landingpad value. +// - Assists in inserting register copies for the output values of a callbr +// along the edges leading to the indirect target blocks. Though the output +// SSA value is defined by the callbr instruction itself in the IR +// representation, the value cannot be copied to the appropriate virtual +// registers prior to jumping to an indirect label, since the jump occurs +// within the user-provided assembly blob. // -// Ideally, this could be done inside SelectionDAG, or in the -// MachineInstruction representation, without the use of an IR-level intrinsic. -// But, within the current framework, it’s simpler to implement as an IR pass. -// (If support for callbr in GlobalISel is implemented, it’s worth considering -// whether this is still required.) +// Instead, those copies must occur separately at the beginning of each +// indirect target. That requires that we create a separate SSA definition +// in each of them (via llvm.callbr.landingpad), and may require splitting +// critical edges so we have a location to place the intrinsic. Finally, we +// remap users of the original callbr output SSA value to instead point to +// the appropriate llvm.callbr.landingpad value. +// +// Ideally, this could be done inside SelectionDAG, or in the +// MachineInstruction representation, without the use of an IR-level +// intrinsic. But, within the current framework, it’s simpler to implement +// as an IR pass. (If support for callbr in GlobalISel is implemented, +// it’s worth considering whether this is still required.) +// +// InlineAsm: +// +// - Prepares inline assembly for code generation with the fast register +// allocator. In particular, it defaults "rm" (register-or-memory) to +// prefer the "m" constraints (the front-end opts for the "r" constraint), +// simplifying register allocation by forcing operands to memory locations. +// The other register allocators are equipped to handle folding registers +// already, so don't need to change the default. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/InlineAsmPrepare.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; #define DEBUG_TYPE "inline-asm-prepare" -static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT); -static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, - DominatorTree &DT); -static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic, - SSAUpdater &SSAUpdate); -static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F); - namespace { class InlineAsmPrepare : public FunctionPass { public: InlineAsmPrepare() : FunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } bool runOnFunction(Function &F) override; + static char ID; }; -} // end anonymous namespace +char InlineAsmPrepare::ID = 0; -PreservedAnalyses InlineAsmPreparePass::run(Function &F, - FunctionAnalysisManager &FAM) { - bool Changed = false; - SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F); +} // end anonymous namespace - if (CBRs.empty()) - return PreservedAnalyses::all(); +INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) - auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); +FunctionPass *llvm::createInlineAsmPreparePass() { + return new InlineAsmPrepare(); +} - Changed |= SplitCriticalEdges(CBRs, DT); - Changed |= InsertIntrinsicCalls(CBRs, DT); +//===----------------------------------------------------------------------===// +// Process InlineAsm instructions +//===----------------------------------------------------------------------===// - if (!Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - return PA; +/// The inline asm constraint allows both register and memory. +static bool IsRegMemConstraint(StringRef Constraint) { + return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr"); } -char InlineAsmPrepare::ID = 0; -INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) +/// Tag "rm" output constraints with '*' to signify that they default to a +/// memory location. +static std::pair<std::string, bool> +ConvertConstraintsToMemory(StringRef ConstraintStr) { + auto I = ConstraintStr.begin(), E = ConstraintStr.end(); + std::string Out; + raw_string_ostream O(Out); + bool HasRegMem = false; + + while (I != E) { + bool IsOutput = false; + bool HasIndirect = false; + if (*I == '=') { + O << *I; + IsOutput = true; + ++I; + if (I == E) + return {}; + } + if (*I == '*') { + O << '*'; + HasIndirect = true; + ++I; + if (I == E) + return {}; + } + if (*I == '+') { + O << '+'; + IsOutput = true; + ++I; + if (I == E) + return {}; + } -FunctionPass *llvm::createInlineAsmPreparePass() { - return new InlineAsmPrepare(); + auto Comma = std::find(I, E, ','); + std::string Sub(I, Comma); + if (IsRegMemConstraint(Sub)) { + HasRegMem = true; + if (IsOutput && !HasIndirect) + O << '*'; + } + + O << Sub; + + if (Comma == E) + break; + + O << ','; + I = Comma + 1; + } + + return {Out, HasRegMem}; } -void InlineAsmPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTreeWrapperPass>(); +/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an +/// input tied to output constraint j. +static void +BuildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints, + SmallVectorImpl<int> &TiedOutput) { + for (unsigned I = 0, E = Constraints.size(); I != E; ++I) { + const InlineAsm::ConstraintInfo &C = Constraints[I]; + if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) { + int InputIdx = C.MatchingInput; + if (InputIdx >= 0 && InputIdx < (int)Constraints.size()) + TiedOutput[InputIdx] = I; + } + + if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) { + int OutputIdx = C.MatchingInput; + if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size()) + TiedOutput[I] = OutputIdx; + } + } } -SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) { - SmallVector<CallBrInst *, 2> CBRs; - for (BasicBlock &BB : F) - if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator())) - if (!CBR->getType()->isVoidTy() && !CBR->use_empty()) - CBRs.push_back(CBR); - return CBRs; +/// Process an output constraint, creating allocas for converted constraints. +static void ProcessOutputConstraint( + const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx, + IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs, + SmallVectorImpl<Type *> &NewArgTypes, SmallVectorImpl<Type *> &NewRetTypes, + SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs, + SmallVectorImpl<AllocaInst *> &OutputAllocas, unsigned ConstraintIdx) { + Type *SlotTy = RetTy; + if (StructType *ST = dyn_cast<StructType>(RetTy)) + SlotTy = ST->getElementType(OutputIdx); + + if (C.hasRegMemConstraints()) { + // Converted to memory constraint. Create alloca and pass pointer as + // argument. + AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem"); + NewArgs.push_back(Slot); + NewArgTypes.push_back(Slot->getType()); + ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy}); + OutputAllocas[ConstraintIdx] = Slot; + // No return value for this output since it's now an out-parameter. + } else { + // Unchanged, still an output return value. + NewRetTypes.push_back(SlotTy); + } } -bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) { - bool Changed = false; - CriticalEdgeSplittingOptions Options(&DT); - Options.setMergeIdenticalEdges(); +/// Process an input constraint, handling tied constraints and conversions. +static void ProcessInputConstraint(const InlineAsm::ConstraintInfo &C, + Value *ArgVal, ArrayRef<int> TiedOutput, + ArrayRef<AllocaInst *> OutputAllocas, + unsigned ConstraintIdx, IRBuilder<> &Builder, + IRBuilder<> &EntryBuilder, + SmallVectorImpl<Value *> &NewArgs, + SmallVectorImpl<Type *> &NewArgTypes) { + Type *ArgTy = ArgVal->getType(); + + if (TiedOutput[ConstraintIdx] != -1) { + int MatchIdx = TiedOutput[ConstraintIdx]; + if (AllocaInst *Slot = OutputAllocas[MatchIdx]) { + // The matched output was converted to memory. Store this input into the + // alloca. + Builder.CreateStore(ArgVal, Slot); + + // Pass the alloca pointer as the argument, instead of ArgVal. This + // ensures the tied "0" constraint matches the "*m" output. + NewArgs.push_back(Slot); + NewArgTypes.push_back(Slot->getType()); + return; + } + } - // The indirect destination might be duplicated between another parameter... - // %0 = callbr ... [label %x, label %x] - // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need - // to split the default destination if it's duplicated between an indirect - // destination... - // %1 = callbr ... to label %x [label %x] - // ...hence starting at 1 and checking against successor 0 (aka the default - // destination). - for (CallBrInst *CBR : CBRs) - for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i) - if (CBR->getSuccessor(i) == CBR->getSuccessor(0) || - isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true)) - if (SplitKnownCriticalEdge(CBR, i, Options)) - Changed = true; - return Changed; + if (C.hasRegMemConstraints()) { + // Converted to memory constraint. Create alloca, store input, pass pointer + // as argument. + AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem"); + Builder.CreateStore(ArgVal, Slot); + NewArgs.push_back(Slot); + NewArgTypes.push_back(Slot->getType()); + } else { + // Unchanged + NewArgs.push_back(ArgVal); + NewArgTypes.push_back(ArgTy); + } } -bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) { - bool Changed = false; - SmallPtrSet<const BasicBlock *, 4> Visited; - IRBuilder<> Builder(CBRs[0]->getContext()); - for (CallBrInst *CBR : CBRs) { - if (!CBR->getNumIndirectDests()) - continue; +/// Build the return type from the collected return types. +static Type *BuildReturnType(ArrayRef<Type *> NewRetTypes, + LLVMContext &Context) { + if (NewRetTypes.empty()) + return Type::getVoidTy(Context); - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(CBR->getType(), CBR->getName()); - SSAUpdate.AddAvailableValue(CBR->getParent(), CBR); - SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR); + if (NewRetTypes.size() == 1) + return NewRetTypes[0]; + + return StructType::get(Context, NewRetTypes); +} - for (BasicBlock *IndDest : CBR->getIndirectDests()) { - if (!Visited.insert(IndDest).second) +/// Create the new inline assembly call with converted constraints. +static CallInst *CreateNewInlineAsm( + InlineAsm *IA, const std::string &NewConstraintStr, Type *NewRetTy, + const SmallVectorImpl<Type *> &NewArgTypes, + const SmallVectorImpl<Value *> &NewArgs, + const SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs, + CallBase *CB, IRBuilder<> &Builder, LLVMContext &Context) { + FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false); + InlineAsm *NewIA = InlineAsm::get( + NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(), + IA->isAlignStack(), IA->getDialect(), IA->canThrow()); + + CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs); + NewCall->setCallingConv(CB->getCallingConv()); + NewCall->setAttributes(CB->getAttributes()); + NewCall->setDebugLoc(CB->getDebugLoc()); + + for (const auto &[Index, Ty] : ElementTypeAttrs) + NewCall->addParamAttr(Index, + Attribute::get(Context, Attribute::ElementType, Ty)); + + return NewCall; +} + +/// Reconstruct the return value from the new call and allocas. +static Value * +ReconstructReturnValue(Type *RetTy, CallInst *NewCall, + const InlineAsm::ConstraintInfoVector &Constraints, + const SmallVectorImpl<AllocaInst *> &OutputAllocas, + const SmallVectorImpl<Type *> &NewRetTypes, + IRBuilder<> &Builder) { + if (RetTy->isVoidTy()) + return nullptr; + + if (isa<StructType>(RetTy)) { + // Multiple outputs. Reconstruct the struct. + Value *Res = PoisonValue::get(RetTy); + unsigned NewRetIdx = 0; + unsigned OriginalOutIdx = 0; + + for (unsigned I = 0, E = Constraints.size(); I != E; ++I) { + if (Constraints[I].Type != InlineAsm::isOutput) continue; - Builder.SetInsertPoint(&*IndDest->begin()); - CallInst *Intrinsic = Builder.CreateIntrinsic( - CBR->getType(), Intrinsic::callbr_landingpad, {CBR}); - SSAUpdate.AddAvailableValue(IndDest, Intrinsic); - UpdateSSA(DT, CBR, Intrinsic, SSAUpdate); - Changed = true; + + Value *Val = nullptr; + if (AllocaInst *Slot = OutputAllocas[I]) { + // Converted to memory. Load from alloca. + Val = Builder.CreateLoad(Slot->getAllocatedType(), Slot); + } else { + // Not converted. Extract from NewCall return. + if (NewRetTypes.size() == 1) { + Val = NewCall; + } else { + Val = Builder.CreateExtractValue(NewCall, NewRetIdx); + } + NewRetIdx++; + } + + Res = Builder.CreateInsertValue(Res, Val, OriginalOutIdx++); + } + + return Res; + } + + // Single output. + // Find the output constraint (should be the first one). + unsigned OutConstraintIdx = 0; + for (unsigned I = 0; I < Constraints.size(); ++I) { + if (Constraints[I].Type == InlineAsm::isOutput) { + OutConstraintIdx = I; + break; } } - return Changed; + + if (AllocaInst *Slot = OutputAllocas[OutConstraintIdx]) + return Builder.CreateLoad(Slot->getAllocatedType(), Slot); + + return NewCall; +} + +static bool ProcessInlineAsm(Function &F, CallBase *CB) { + InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand()); + const InlineAsm::ConstraintInfoVector &Constraints = IA->ParseConstraints(); + + const auto &[NewConstraintStr, HasRegMem] = + ConvertConstraintsToMemory(IA->getConstraintString()); + if (!HasRegMem) + return false; + + IRBuilder<> Builder(CB); + IRBuilder<> EntryBuilder(&F.getEntryBlock(), F.getEntryBlock().begin()); + + // Collect new arguments and return types. + SmallVector<Value *, 8> NewArgs; + SmallVector<Type *, 8> NewArgTypes; ---------------- nikic wrote:
Why do we need this NewArgTypes array? Isn't this always the same as types of the values in NewArgs? Can you just create it from that array in CreateNewInlineAsm to avoid having to manage it everywhere else? https://github.com/llvm/llvm-project/pull/181973 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
