================ @@ -1,204 +1,464 @@ -//===-- InlineAsmPrepare - Prepare inline asm for code gen ----------------===// +//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's -// codegen. +// This pass lowers callbrs and inline asm in LLVM IR in order to assist +// SelectionDAG's codegen. // -// In particular, this pass assists in inserting register copies for the output -// values of a callbr along the edges leading to the indirect target blocks. -// Though the output SSA value is defined by the callbr instruction itself in -// the IR representation, the value cannot be copied to the appropriate virtual -// registers prior to jumping to an indirect label, since the jump occurs -// within the user-provided assembly blob. +// CallBrInst: // -// Instead, those copies must occur separately at the beginning of each -// indirect target. That requires that we create a separate SSA definition in -// each of them (via llvm.callbr.landingpad), and may require splitting -// critical edges so we have a location to place the intrinsic. Finally, we -// remap users of the original callbr output SSA value to instead point to the -// appropriate llvm.callbr.landingpad value. +// - Assists in inserting register copies for the output values of a callbr +// along the edges leading to the indirect target blocks. Though the output +// SSA value is defined by the callbr instruction itself in the IR +// representation, the value cannot be copied to the appropriate virtual +// registers prior to jumping to an indirect label, since the jump occurs +// within the user-provided assembly blob. // -// Ideally, this could be done inside SelectionDAG, or in the -// MachineInstruction representation, without the use of an IR-level intrinsic. -// But, within the current framework, it’s simpler to implement as an IR pass. -// (If support for callbr in GlobalISel is implemented, it’s worth considering -// whether this is still required.) +// Instead, those copies must occur separately at the beginning of each +// indirect target. That requires that we create a separate SSA definition +// in each of them (via llvm.callbr.landingpad), and may require splitting +// critical edges so we have a location to place the intrinsic. Finally, we +// remap users of the original callbr output SSA value to instead point to +// the appropriate llvm.callbr.landingpad value. +// +// Ideally, this could be done inside SelectionDAG, or in the +// MachineInstruction representation, without the use of an IR-level +// intrinsic. But, within the current framework, it’s simpler to implement +// as an IR pass. (If support for callbr in GlobalISel is implemented, +// it’s worth considering whether this is still required.) +// +// InlineAsm: +// +// - Prepares inline assembly for code generation with the fast register +// allocator. In particular, it defaults "rm" (register-or-memory) to +// prefer the "m" constraints (the front-end opts for the "r" constraint), +// simplifying register allocation by forcing operands to memory locations. +// The other register allocators are equipped to handle folding registers +// already, so don't need to change the default. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/InlineAsmPrepare.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include <sstream> using namespace llvm; #define DEBUG_TYPE "inline-asm-prepare" -static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT); -static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, - DominatorTree &DT); -static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic, - SSAUpdater &SSAUpdate); -static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F); - namespace { class InlineAsmPrepare : public FunctionPass { public: InlineAsmPrepare() : FunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } bool runOnFunction(Function &F) override; + static char ID; }; -} // end anonymous namespace +char InlineAsmPrepare::ID = 0; -PreservedAnalyses InlineAsmPreparePass::run(Function &F, - FunctionAnalysisManager &FAM) { - bool Changed = false; - SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F); +} // end anonymous namespace - if (CBRs.empty()) - return PreservedAnalyses::all(); +INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts", + false, false) - auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); +FunctionPass *llvm::createInlineAsmPreparePass() { + return new InlineAsmPrepare(); +} - Changed |= SplitCriticalEdges(CBRs, DT); - Changed |= InsertIntrinsicCalls(CBRs, DT); +//===----------------------------------------------------------------------===// +// Process InlineAsm instructions +//===----------------------------------------------------------------------===// - if (!Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - return PA; +/// The inline asm constraint allows both register and memory. +static bool isRegMemConstraint(StringRef Constraint) { + return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr"); } -char InlineAsmPrepare::ID = 0; -INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare", - "Prepare inline asm insts", false, false) +/// Tag "rm" output constraints with '*' to signify that they default to a +/// memory location. +static std::tuple<std::string, bool, bool> +convertConstraintsToMemory(StringRef ConstraintStr) { + std::vector<std::string> Constraints; + Constraints.reserve(ConstraintStr.count(',') + 1); + + std::istringstream OS(ConstraintStr.str()); + std::string Constraint; + while (std::getline(OS, Constraint, ',')) + Constraints.push_back(Constraint); + + bool HasRegMem = false; + bool MayWriteMem = false; + for (auto &Constraint : Constraints) { + std::string NewConstraint; + + auto I = Constraint.begin(), E = Constraint.end(); + bool HasIndirect = false; + + if (*I == '=') { + if (Constraint.size() == 1) + return {}; + ++I; + NewConstraint += '='; + } + if (*I == '*') { + if (Constraint.size() == 1) + return {}; + ++I; + NewConstraint += '*'; + HasIndirect = true; + } + if (*I == '+') { + if (Constraint.size() == 1) + return {}; + ++I; + NewConstraint += '+'; + } -FunctionPass *llvm::createInlineAsmPreparePass() { - return new InlineAsmPrepare(); + if (isRegMemConstraint(std::string(I, E))) { + HasRegMem = true; + MayWriteMem = true; + if (!HasIndirect) + NewConstraint += '*'; + } + + NewConstraint += std::string(I, E); + Constraint = NewConstraint; + } + + return {llvm::join(Constraints, ","), HasRegMem, MayWriteMem}; } -void InlineAsmPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTreeWrapperPass>(); +/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an +/// input tied to output constraint j. +static void +buildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints, + SmallVectorImpl<int> &TiedOutput) { + for (unsigned I = 0, E = Constraints.size(); I != E; ++I) { + const InlineAsm::ConstraintInfo &C = Constraints[I]; + if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) { + int InputIdx = C.MatchingInput; + if (InputIdx >= 0 && InputIdx < (int)Constraints.size()) + TiedOutput[InputIdx] = I; + } + + if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) { + int OutputIdx = C.MatchingInput; + if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size()) + TiedOutput[I] = OutputIdx; + } + } } -SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) { - SmallVector<CallBrInst *, 2> CBRs; - for (BasicBlock &BB : F) - if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator())) - if (!CBR->getType()->isVoidTy() && !CBR->use_empty()) - CBRs.push_back(CBR); - return CBRs; +/// Process an output constraint, creating allocas for converted constraints. +static void processOutputConstraint( + const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx, + IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs, + SmallVectorImpl<Type *> &NewRetTypes, + SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs, + SmallVectorImpl<std::pair<AllocaInst *, Type *>> &OutputAllocas, + unsigned ConstraintIdx) { + Type *SlotTy = RetTy; + if (StructType *ST = dyn_cast<StructType>(RetTy)) + SlotTy = ST->getElementType(OutputIdx); + + if (C.hasRegMemConstraints()) { + // Converted to memory constraint. Create alloca and pass pointer as + // argument. + AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem"); + NewArgs.push_back(Slot); + ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy}); + OutputAllocas[ConstraintIdx] = std::make_pair(Slot, SlotTy); + // No return value for this output since it's now an out-parameter. + } else { + // Unchanged, still an output return value. + NewRetTypes.push_back(SlotTy); + } } -bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) { - bool Changed = false; - CriticalEdgeSplittingOptions Options(&DT); - Options.setMergeIdenticalEdges(); +/// Process an input constraint, handling tied constraints and conversions. +static void processInputConstraint( + const InlineAsm::ConstraintInfo &C, Value *ArgVal, Type *InputElementType, + ArrayRef<int> TiedOutput, + ArrayRef<std::pair<AllocaInst *, Type *>> OutputAllocas, + unsigned ConstraintIdx, IRBuilder<> &Builder, IRBuilder<> &EntryBuilder, + SmallVectorImpl<Value *> &NewArgs, + SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs) { + Type *ArgTy = ArgVal->getType(); + + if (TiedOutput[ConstraintIdx] != -1) { + int MatchIdx = TiedOutput[ConstraintIdx]; + if (auto [Slot, _] = OutputAllocas[MatchIdx]; Slot) { + // The matched output was converted to memory. Store this input into the + // alloca. + Builder.CreateStore(ArgVal, Slot); + + // Pass the alloca pointer as the argument, instead of ArgVal. This + // ensures the tied "0" constraint matches the "*m" output. + NewArgs.push_back(Slot); + return; + } + } - // The indirect destination might be duplicated between another parameter... - // %0 = callbr ... [label %x, label %x] - // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need - // to split the default destination if it's duplicated between an indirect - // destination... - // %1 = callbr ... to label %x [label %x] - // ...hence starting at 1 and checking against successor 0 (aka the default - // destination). - for (CallBrInst *CBR : CBRs) - for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i) - if (CBR->getSuccessor(i) == CBR->getSuccessor(0) || - isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true)) - if (SplitKnownCriticalEdge(CBR, i, Options)) - Changed = true; - return Changed; + if (C.hasRegMemConstraints() && !C.isIndirect) { + // Converted to memory constraint. Create alloca, store input, pass pointer + // as argument. + AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem"); + Builder.CreateStore(ArgVal, Slot); + NewArgs.push_back(Slot); + ElementTypeAttrs.push_back({NewArgs.size() - 1, ArgTy}); + } else { + // Unchanged + NewArgs.push_back(ArgVal); + if (InputElementType) + ElementTypeAttrs.push_back({NewArgs.size() - 1, InputElementType}); + } } -bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) { - bool Changed = false; - SmallPtrSet<const BasicBlock *, 4> Visited; - IRBuilder<> Builder(CBRs[0]->getContext()); - for (CallBrInst *CBR : CBRs) { - if (!CBR->getNumIndirectDests()) - continue; +/// Build the return type from the collected return types. +static Type *buildReturnType(ArrayRef<Type *> NewRetTypes, + LLVMContext &Context) { + if (NewRetTypes.empty()) + return Type::getVoidTy(Context); + + if (NewRetTypes.size() == 1) + return NewRetTypes[0]; - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(CBR->getType(), CBR->getName()); - SSAUpdate.AddAvailableValue(CBR->getParent(), CBR); - SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR); + return StructType::get(Context, NewRetTypes); +} + +/// Create the new inline assembly call with converted constraints. +static CallInst * +createNewInlineAsm(InlineAsm *IA, const std::string &NewConstraintStr, + Type *NewRetTy, ArrayRef<Value *> NewArgs, + ArrayRef<std::pair<unsigned, Type *>> ElementTypeAttrs, + CallBase *CB, bool MayWriteMem, IRBuilder<> &Builder, + LLVMContext &Context) { + SmallVector<Type *> NewArgTypes; + for (const auto *NewArg : NewArgs) + NewArgTypes.push_back(NewArg->getType()); + + FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false); + InlineAsm *NewIA = InlineAsm::get( + NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(), + IA->isAlignStack(), IA->getDialect(), IA->canThrow()); + + CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs); + NewCall->setCallingConv(CB->getCallingConv()); + NewCall->setAttributes(CB->getAttributes()); + NewCall->setDebugLoc(CB->getDebugLoc()); + NewCall->setMemoryEffects(MayWriteMem ? MemoryEffects::writeOnly() + : MemoryEffects::readOnly()); ---------------- nikic wrote:
This isn't right. What I'd expect to happen here is that you take the original memory effects and then add an `argMemOnly(ModRefInfo::Mod)` effect to it if you introduced an indirect output (or same with Ref for an input). (The memory attributes should be tested in the IR test.) https://github.com/llvm/llvm-project/pull/181973 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
