================
@@ -1,171 +1,425 @@
-//===-- InlineAsmPrepare - Prepare inline asm for code gen 
----------------===//
+//===-- InlineAsmPrepare - Prepare inline asm for code generation 
---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===----------------------------------------------------------------------===//
 //
-// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
-// codegen.
+// This pass lowers callbrs and inline asm in LLVM IR in order to assist
+// SelectionDAG's codegen.
 //
-// In particular, this pass assists in inserting register copies for the output
-// values of a callbr along the edges leading to the indirect target blocks.
-// Though the output SSA value is defined by the callbr instruction itself in
-// the IR representation, the value cannot be copied to the appropriate virtual
-// registers prior to jumping to an indirect label, since the jump occurs
-// within the user-provided assembly blob.
+// CallBrInst:
 //
-// Instead, those copies must occur separately at the beginning of each
-// indirect target. That requires that we create a separate SSA definition in
-// each of them (via llvm.callbr.landingpad), and may require splitting
-// critical edges so we have a location to place the intrinsic. Finally, we
-// remap users of the original callbr output SSA value to instead point to the
-// appropriate llvm.callbr.landingpad value.
+//   - Assists in inserting register copies for the output values of a callbr
+//     along the edges leading to the indirect target blocks. Though the output
+//     SSA value is defined by the callbr instruction itself in the IR
+//     representation, the value cannot be copied to the appropriate virtual
+//     registers prior to jumping to an indirect label, since the jump occurs
+//     within the user-provided assembly blob.
 //
-// Ideally, this could be done inside SelectionDAG, or in the
-// MachineInstruction representation, without the use of an IR-level intrinsic.
-// But, within the current framework, it’s simpler to implement as an IR pass.
-// (If support for callbr in GlobalISel is implemented, it’s worth considering
-// whether this is still required.)
+//     Instead, those copies must occur separately at the beginning of each
+//     indirect target. That requires that we create a separate SSA definition
+//     in each of them (via llvm.callbr.landingpad), and may require splitting
+//     critical edges so we have a location to place the intrinsic. Finally, we
+//     remap users of the original callbr output SSA value to instead point to
+//     the appropriate llvm.callbr.landingpad value.
+//
+//     Ideally, this could be done inside SelectionDAG, or in the
+//     MachineInstruction representation, without the use of an IR-level
+//     intrinsic.  But, within the current framework, it’s simpler to implement
+//     as an IR pass.  (If support for callbr in GlobalISel is implemented,
+//     it’s worth considering whether this is still required.)
+//
+// InlineAsm:
+//
+//   - Prepares inline assembly for code generation with the fast register
+//     allocator. In particular, it defaults "rm" (register-or-memory) to
+//     prefer the "m" constraints (the front-end opts for the "r" constraint),
+//     simplifying register allocation by forcing operands to memory locations.
+//     The other register allocators are equipped to handle folding registers
+//     already, so don't need to change the default.
 //
 
//===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/InlineAsmPrepare.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 
 using namespace llvm;
 
 #define DEBUG_TYPE "inline-asm-prepare"
 
-static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
-static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
-                                 DominatorTree &DT);
-static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
-                      SSAUpdater &SSAUpdate);
-static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F);
-
 namespace {
 
 class InlineAsmPrepare : public FunctionPass {
 public:
   InlineAsmPrepare() : FunctionPass(ID) {}
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetPassConfig>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+  }
   bool runOnFunction(Function &F) override;
+
   static char ID;
 };
 
-} // end anonymous namespace
+char InlineAsmPrepare::ID = 0;
 
-PreservedAnalyses InlineAsmPreparePass::run(Function &F,
-                                            FunctionAnalysisManager &FAM) {
-  bool Changed = false;
-  SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F);
+} // end anonymous namespace
 
-  if (CBRs.empty())
-    return PreservedAnalyses::all();
+INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+                    false, false)
 
-  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+FunctionPass *llvm::createInlineAsmPreparePass() {
+  return new InlineAsmPrepare();
+}
 
-  Changed |= SplitCriticalEdges(CBRs, DT);
-  Changed |= InsertIntrinsicCalls(CBRs, DT);
+//===----------------------------------------------------------------------===//
+//                     Process InlineAsm instructions
+//===----------------------------------------------------------------------===//
 
-  if (!Changed)
-    return PreservedAnalyses::all();
-  PreservedAnalyses PA;
-  PA.preserve<DominatorTreeAnalysis>();
-  return PA;
+/// The inline asm constraint allows both register and memory.
+static bool IsRegMemConstraint(StringRef Constraint) {
+  return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr");
 }
 
-char InlineAsmPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare",
-                      "Prepare inline asm insts", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare",
-                    "Prepare inline asm insts", false, false)
+/// Tag "rm" output constraints with '*' to signify that they default to a
+/// memory location.
+static std::pair<std::string, bool>
+ConvertConstraintsToMemory(StringRef ConstraintStr) {
+  auto I = ConstraintStr.begin(), E = ConstraintStr.end();
+  std::string Out;
+  raw_string_ostream O(Out);
+  bool HasRegMem = false;
+
+  while (I != E) {
+    bool IsOutput = false;
+    bool HasIndirect = false;
+    if (*I == '=') {
+      O << *I;
+      IsOutput = true;
+      ++I;
+      if (I == E)
+        return {};
+    }
+    if (*I == '*') {
+      O << '*';
+      HasIndirect = true;
+      ++I;
+      if (I == E)
+        return {};
+    }
+    if (*I == '+') {
+      O << '+';
+      IsOutput = true;
+      ++I;
+      if (I == E)
+        return {};
+    }
 
-FunctionPass *llvm::createInlineAsmPreparePass() {
-  return new InlineAsmPrepare();
+    auto Comma = std::find(I, E, ',');
+    std::string Sub(I, Comma);
+    if (IsRegMemConstraint(Sub)) {
+      HasRegMem = true;
+      if (IsOutput && !HasIndirect)
+        O << '*';
+    }
+
+    O << Sub;
+
+    if (Comma == E)
+      break;
+
+    O << ',';
+    I = Comma + 1;
+  }
+
+  return {Out, HasRegMem};
 }
 
-void InlineAsmPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addPreserved<DominatorTreeWrapperPass>();
+/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an
+/// input tied to output constraint j.
+static void
+BuildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints,
+                       SmallVectorImpl<int> &TiedOutput) {
+  for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
+    const InlineAsm::ConstraintInfo &C = Constraints[I];
+    if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) {
+      int InputIdx = C.MatchingInput;
+      if (InputIdx >= 0 && InputIdx < (int)Constraints.size())
+        TiedOutput[InputIdx] = I;
+    }
+
+    if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) {
+      int OutputIdx = C.MatchingInput;
+      if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size())
+        TiedOutput[I] = OutputIdx;
+    }
+  }
 }
 
-SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) {
-  SmallVector<CallBrInst *, 2> CBRs;
-  for (BasicBlock &BB : F)
-    if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
-      if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
-        CBRs.push_back(CBR);
-  return CBRs;
+/// Process an output constraint, creating allocas for converted constraints.
+static void ProcessOutputConstraint(
+    const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx,
+    IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs,
+    SmallVectorImpl<Type *> &NewArgTypes, SmallVectorImpl<Type *> &NewRetTypes,
+    SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
+    SmallVectorImpl<AllocaInst *> &OutputAllocas, unsigned ConstraintIdx) {
+  Type *SlotTy = RetTy;
+  if (StructType *ST = dyn_cast<StructType>(RetTy))
+    SlotTy = ST->getElementType(OutputIdx);
+
+  if (C.hasRegMemConstraints()) {
+    // Converted to memory constraint. Create alloca and pass pointer as
+    // argument.
+    AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem");
+    NewArgs.push_back(Slot);
+    NewArgTypes.push_back(Slot->getType());
+    ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy});
+    OutputAllocas[ConstraintIdx] = Slot;
+    // No return value for this output since it's now an out-parameter.
+  } else {
+    // Unchanged, still an output return value.
+    NewRetTypes.push_back(SlotTy);
+  }
 }
 
-bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
-  bool Changed = false;
-  CriticalEdgeSplittingOptions Options(&DT);
-  Options.setMergeIdenticalEdges();
+/// Process an input constraint, handling tied constraints and conversions.
+static void ProcessInputConstraint(const InlineAsm::ConstraintInfo &C,
+                                   Value *ArgVal, ArrayRef<int> TiedOutput,
+                                   ArrayRef<AllocaInst *> OutputAllocas,
+                                   unsigned ConstraintIdx, IRBuilder<> 
&Builder,
+                                   IRBuilder<> &EntryBuilder,
+                                   SmallVectorImpl<Value *> &NewArgs,
+                                   SmallVectorImpl<Type *> &NewArgTypes) {
+  Type *ArgTy = ArgVal->getType();
+
+  if (TiedOutput[ConstraintIdx] != -1) {
+    int MatchIdx = TiedOutput[ConstraintIdx];
+    if (AllocaInst *Slot = OutputAllocas[MatchIdx]) {
+      // The matched output was converted to memory. Store this input into the
+      // alloca.
+      Builder.CreateStore(ArgVal, Slot);
+
+      // Pass the alloca pointer as the argument, instead of ArgVal. This
+      // ensures the tied "0" constraint matches the "*m" output.
+      NewArgs.push_back(Slot);
+      NewArgTypes.push_back(Slot->getType());
+      return;
+    }
+  }
 
-  // The indirect destination might be duplicated between another parameter...
-  //   %0 = callbr ... [label %x, label %x]
-  // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
-  // to split the default destination if it's duplicated between an indirect
-  // destination...
-  //   %1 = callbr ... to label %x [label %x]
-  // ...hence starting at 1 and checking against successor 0 (aka the default
-  // destination).
-  for (CallBrInst *CBR : CBRs)
-    for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
-      if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
-          isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
-        if (SplitKnownCriticalEdge(CBR, i, Options))
-          Changed = true;
-  return Changed;
+  if (C.hasRegMemConstraints()) {
+    // Converted to memory constraint. Create alloca, store input, pass pointer
+    // as argument.
+    AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem");
+    Builder.CreateStore(ArgVal, Slot);
+    NewArgs.push_back(Slot);
+    NewArgTypes.push_back(Slot->getType());
+  } else {
+    // Unchanged
+    NewArgs.push_back(ArgVal);
+    NewArgTypes.push_back(ArgTy);
+  }
 }
 
-bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
-  bool Changed = false;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  IRBuilder<> Builder(CBRs[0]->getContext());
-  for (CallBrInst *CBR : CBRs) {
-    if (!CBR->getNumIndirectDests())
-      continue;
+/// Build the return type from the collected return types.
+static Type *BuildReturnType(ArrayRef<Type *> NewRetTypes,
+                             LLVMContext &Context) {
+  if (NewRetTypes.empty())
+    return Type::getVoidTy(Context);
 
-    SSAUpdater SSAUpdate;
-    SSAUpdate.Initialize(CBR->getType(), CBR->getName());
-    SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
-    SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+  if (NewRetTypes.size() == 1)
+    return NewRetTypes[0];
+
+  return StructType::get(Context, NewRetTypes);
+}
 
-    for (BasicBlock *IndDest : CBR->getIndirectDests()) {
-      if (!Visited.insert(IndDest).second)
+/// Create the new inline assembly call with converted constraints.
+static CallInst *CreateNewInlineAsm(
+    InlineAsm *IA, const std::string &NewConstraintStr, Type *NewRetTy,
+    const SmallVectorImpl<Type *> &NewArgTypes,
+    const SmallVectorImpl<Value *> &NewArgs,
+    const SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
+    CallBase *CB, IRBuilder<> &Builder, LLVMContext &Context) {
+  FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false);
+  InlineAsm *NewIA = InlineAsm::get(
+      NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(),
+      IA->isAlignStack(), IA->getDialect(), IA->canThrow());
+
+  CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs);
+  NewCall->setCallingConv(CB->getCallingConv());
+  NewCall->setAttributes(CB->getAttributes());
+  NewCall->setDebugLoc(CB->getDebugLoc());
+
+  for (const auto &[Index, Ty] : ElementTypeAttrs)
+    NewCall->addParamAttr(Index,
+                          Attribute::get(Context, Attribute::ElementType, Ty));
+
+  return NewCall;
+}
+
+/// Reconstruct the return value from the new call and allocas.
+static Value *
+ReconstructReturnValue(Type *RetTy, CallInst *NewCall,
+                       const InlineAsm::ConstraintInfoVector &Constraints,
+                       const SmallVectorImpl<AllocaInst *> &OutputAllocas,
+                       const SmallVectorImpl<Type *> &NewRetTypes,
+                       IRBuilder<> &Builder) {
+  if (RetTy->isVoidTy())
+    return nullptr;
+
+  if (isa<StructType>(RetTy)) {
+    // Multiple outputs. Reconstruct the struct.
+    Value *Res = PoisonValue::get(RetTy);
+    unsigned NewRetIdx = 0;
+    unsigned OriginalOutIdx = 0;
+
+    for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
+      if (Constraints[I].Type != InlineAsm::isOutput)
         continue;
-      Builder.SetInsertPoint(&*IndDest->begin());
-      CallInst *Intrinsic = Builder.CreateIntrinsic(
-          CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
-      SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
-      UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
-      Changed = true;
+
+      Value *Val = nullptr;
+      if (AllocaInst *Slot = OutputAllocas[I]) {
+        // Converted to memory. Load from alloca.
+        Val = Builder.CreateLoad(Slot->getAllocatedType(), Slot);
----------------
nikic wrote:

Avoid getAllocatedType(), which is in the process of being removed. If we can't 
get it from elsewhere, make OutputAllocas an `std::pair<AllocaInst *, Type *>`.

https://github.com/llvm/llvm-project/pull/181973
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to