================
@@ -1,204 +1,464 @@
-//===-- InlineAsmPrepare - Prepare inline asm for code gen 
----------------===//
+//===-- InlineAsmPrepare - Prepare inline asm for code generation 
---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===----------------------------------------------------------------------===//
 //
-// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
-// codegen.
+// This pass lowers callbrs and inline asm in LLVM IR in order to assist
+// SelectionDAG's codegen.
 //
-// In particular, this pass assists in inserting register copies for the output
-// values of a callbr along the edges leading to the indirect target blocks.
-// Though the output SSA value is defined by the callbr instruction itself in
-// the IR representation, the value cannot be copied to the appropriate virtual
-// registers prior to jumping to an indirect label, since the jump occurs
-// within the user-provided assembly blob.
+// CallBrInst:
 //
-// Instead, those copies must occur separately at the beginning of each
-// indirect target. That requires that we create a separate SSA definition in
-// each of them (via llvm.callbr.landingpad), and may require splitting
-// critical edges so we have a location to place the intrinsic. Finally, we
-// remap users of the original callbr output SSA value to instead point to the
-// appropriate llvm.callbr.landingpad value.
+//   - Assists in inserting register copies for the output values of a callbr
+//     along the edges leading to the indirect target blocks. Though the output
+//     SSA value is defined by the callbr instruction itself in the IR
+//     representation, the value cannot be copied to the appropriate virtual
+//     registers prior to jumping to an indirect label, since the jump occurs
+//     within the user-provided assembly blob.
 //
-// Ideally, this could be done inside SelectionDAG, or in the
-// MachineInstruction representation, without the use of an IR-level intrinsic.
-// But, within the current framework, it’s simpler to implement as an IR pass.
-// (If support for callbr in GlobalISel is implemented, it’s worth considering
-// whether this is still required.)
+//     Instead, those copies must occur separately at the beginning of each
+//     indirect target. That requires that we create a separate SSA definition
+//     in each of them (via llvm.callbr.landingpad), and may require splitting
+//     critical edges so we have a location to place the intrinsic. Finally, we
+//     remap users of the original callbr output SSA value to instead point to
+//     the appropriate llvm.callbr.landingpad value.
+//
+//     Ideally, this could be done inside SelectionDAG, or in the
+//     MachineInstruction representation, without the use of an IR-level
+//     intrinsic.  But, within the current framework, it’s simpler to implement
+//     as an IR pass.  (If support for callbr in GlobalISel is implemented,
+//     it’s worth considering whether this is still required.)
+//
+// InlineAsm:
+//
+//   - Prepares inline assembly for code generation with the fast register
+//     allocator. In particular, it defaults "rm" (register-or-memory) to
+//     prefer the "m" constraints (the front-end opts for the "r" constraint),
+//     simplifying register allocation by forcing operands to memory locations.
+//     The other register allocators are equipped to handle folding registers
+//     already, so don't need to change the default.
 //
 
//===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/InlineAsmPrepare.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <sstream>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "inline-asm-prepare"
 
-static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
-static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
-                                 DominatorTree &DT);
-static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
-                      SSAUpdater &SSAUpdate);
-static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F);
-
 namespace {
 
 class InlineAsmPrepare : public FunctionPass {
 public:
   InlineAsmPrepare() : FunctionPass(ID) {}
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetPassConfig>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+  }
   bool runOnFunction(Function &F) override;
+
   static char ID;
 };
 
-} // end anonymous namespace
+char InlineAsmPrepare::ID = 0;
 
-PreservedAnalyses InlineAsmPreparePass::run(Function &F,
-                                            FunctionAnalysisManager &FAM) {
-  bool Changed = false;
-  SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F);
+} // end anonymous namespace
 
-  if (CBRs.empty())
-    return PreservedAnalyses::all();
+INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+                    false, false)
 
-  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+FunctionPass *llvm::createInlineAsmPreparePass() {
+  return new InlineAsmPrepare();
+}
 
-  Changed |= SplitCriticalEdges(CBRs, DT);
-  Changed |= InsertIntrinsicCalls(CBRs, DT);
+//===----------------------------------------------------------------------===//
+//                     Process InlineAsm instructions
+//===----------------------------------------------------------------------===//
 
-  if (!Changed)
-    return PreservedAnalyses::all();
-  PreservedAnalyses PA;
-  PA.preserve<DominatorTreeAnalysis>();
-  return PA;
+/// The inline asm constraint allows both register and memory.
+static bool isRegMemConstraint(StringRef Constraint) {
+  return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr");
 }
 
-char InlineAsmPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare",
-                      "Prepare inline asm insts", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare",
-                    "Prepare inline asm insts", false, false)
+/// Tag "rm" output constraints with '*' to signify that they default to a
+/// memory location.
+static std::tuple<std::string, bool, bool>
+convertConstraintsToMemory(StringRef ConstraintStr) {
+  std::vector<std::string> Constraints;
+  Constraints.reserve(ConstraintStr.count(',') + 1);
+
+  std::istringstream OS(ConstraintStr.str());
+  std::string Constraint;
+  while (std::getline(OS, Constraint, ','))
+    Constraints.push_back(Constraint);
+
+  bool HasRegMem = false;
+  bool MayWriteMem = false;
+  for (auto &Constraint : Constraints) {
+    std::string NewConstraint;
+
+    auto I = Constraint.begin(), E = Constraint.end();
+    bool HasIndirect = false;
+
+    if (*I == '=') {
+      if (Constraint.size() == 1)
+        return {};
+      ++I;
+      NewConstraint += '=';
+    }
+    if (*I == '*') {
+      if (Constraint.size() == 1)
+        return {};
+      ++I;
+      NewConstraint += '*';
+      HasIndirect = true;
+    }
+    if (*I == '+') {
+      if (Constraint.size() == 1)
+        return {};
+      ++I;
+      NewConstraint += '+';
+    }
 
-FunctionPass *llvm::createInlineAsmPreparePass() {
-  return new InlineAsmPrepare();
+    if (isRegMemConstraint(std::string(I, E))) {
+      HasRegMem = true;
+      MayWriteMem = true;
+      if (!HasIndirect)
+        NewConstraint += '*';
+    }
+
+    NewConstraint += std::string(I, E);
+    Constraint = NewConstraint;
+  }
+
+  return {llvm::join(Constraints, ","), HasRegMem, MayWriteMem};
 }
 
-void InlineAsmPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addPreserved<DominatorTreeWrapperPass>();
+/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an
+/// input tied to output constraint j.
+static void
+buildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints,
+                       SmallVectorImpl<int> &TiedOutput) {
+  for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
+    const InlineAsm::ConstraintInfo &C = Constraints[I];
+    if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) {
+      int InputIdx = C.MatchingInput;
+      if (InputIdx >= 0 && InputIdx < (int)Constraints.size())
+        TiedOutput[InputIdx] = I;
+    }
+
+    if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) {
+      int OutputIdx = C.MatchingInput;
+      if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size())
+        TiedOutput[I] = OutputIdx;
+    }
+  }
 }
 
-SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) {
-  SmallVector<CallBrInst *, 2> CBRs;
-  for (BasicBlock &BB : F)
-    if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
-      if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
-        CBRs.push_back(CBR);
-  return CBRs;
+/// Process an output constraint, creating allocas for converted constraints.
+static void processOutputConstraint(
+    const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx,
+    IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs,
+    SmallVectorImpl<Type *> &NewRetTypes,
+    SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
+    SmallVectorImpl<std::pair<AllocaInst *, Type *>> &OutputAllocas,
+    unsigned ConstraintIdx) {
+  Type *SlotTy = RetTy;
+  if (StructType *ST = dyn_cast<StructType>(RetTy))
+    SlotTy = ST->getElementType(OutputIdx);
+
+  if (C.hasRegMemConstraints()) {
+    // Converted to memory constraint. Create alloca and pass pointer as
+    // argument.
+    AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem");
+    NewArgs.push_back(Slot);
+    ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy});
+    OutputAllocas[ConstraintIdx] = std::make_pair(Slot, SlotTy);
+    // No return value for this output since it's now an out-parameter.
+  } else {
+    // Unchanged, still an output return value.
+    NewRetTypes.push_back(SlotTy);
+  }
 }
 
-bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
-  bool Changed = false;
-  CriticalEdgeSplittingOptions Options(&DT);
-  Options.setMergeIdenticalEdges();
+/// Process an input constraint, handling tied constraints and conversions.
+static void processInputConstraint(
+    const InlineAsm::ConstraintInfo &C, Value *ArgVal, Type *InputElementType,
+    ArrayRef<int> TiedOutput,
+    ArrayRef<std::pair<AllocaInst *, Type *>> OutputAllocas,
+    unsigned ConstraintIdx, IRBuilder<> &Builder, IRBuilder<> &EntryBuilder,
+    SmallVectorImpl<Value *> &NewArgs,
+    SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs) {
+  Type *ArgTy = ArgVal->getType();
+
+  if (TiedOutput[ConstraintIdx] != -1) {
+    int MatchIdx = TiedOutput[ConstraintIdx];
+    if (auto [Slot, _] = OutputAllocas[MatchIdx]; Slot) {
+      // The matched output was converted to memory. Store this input into the
+      // alloca.
+      Builder.CreateStore(ArgVal, Slot);
+
+      // Pass the alloca pointer as the argument, instead of ArgVal. This
+      // ensures the tied "0" constraint matches the "*m" output.
+      NewArgs.push_back(Slot);
+      return;
+    }
+  }
 
-  // The indirect destination might be duplicated between another parameter...
-  //   %0 = callbr ... [label %x, label %x]
-  // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
-  // to split the default destination if it's duplicated between an indirect
-  // destination...
-  //   %1 = callbr ... to label %x [label %x]
-  // ...hence starting at 1 and checking against successor 0 (aka the default
-  // destination).
-  for (CallBrInst *CBR : CBRs)
-    for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
-      if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
-          isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
-        if (SplitKnownCriticalEdge(CBR, i, Options))
-          Changed = true;
-  return Changed;
+  if (C.hasRegMemConstraints() && !C.isIndirect) {
+    // Converted to memory constraint. Create alloca, store input, pass pointer
+    // as argument.
+    AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem");
+    Builder.CreateStore(ArgVal, Slot);
+    NewArgs.push_back(Slot);
+    ElementTypeAttrs.push_back({NewArgs.size() - 1, ArgTy});
+  } else {
+    // Unchanged
+    NewArgs.push_back(ArgVal);
+    if (InputElementType)
+      ElementTypeAttrs.push_back({NewArgs.size() - 1, InputElementType});
+  }
 }
 
-bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
-  bool Changed = false;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  IRBuilder<> Builder(CBRs[0]->getContext());
-  for (CallBrInst *CBR : CBRs) {
-    if (!CBR->getNumIndirectDests())
-      continue;
+/// Build the return type from the collected return types.
+static Type *buildReturnType(ArrayRef<Type *> NewRetTypes,
+                             LLVMContext &Context) {
+  if (NewRetTypes.empty())
+    return Type::getVoidTy(Context);
+
+  if (NewRetTypes.size() == 1)
+    return NewRetTypes[0];
 
-    SSAUpdater SSAUpdate;
-    SSAUpdate.Initialize(CBR->getType(), CBR->getName());
-    SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
-    SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+  return StructType::get(Context, NewRetTypes);
+}
+
+/// Create the new inline assembly call with converted constraints.
+static CallInst *
+createNewInlineAsm(InlineAsm *IA, const std::string &NewConstraintStr,
+                   Type *NewRetTy, ArrayRef<Value *> NewArgs,
+                   ArrayRef<std::pair<unsigned, Type *>> ElementTypeAttrs,
+                   CallBase *CB, bool MayWriteMem, IRBuilder<> &Builder,
+                   LLVMContext &Context) {
+  SmallVector<Type *> NewArgTypes;
+  for (const auto *NewArg : NewArgs)
+    NewArgTypes.push_back(NewArg->getType());
+
+  FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false);
+  InlineAsm *NewIA = InlineAsm::get(
+      NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(),
+      IA->isAlignStack(), IA->getDialect(), IA->canThrow());
+
+  CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs);
+  NewCall->setCallingConv(CB->getCallingConv());
+  NewCall->setAttributes(CB->getAttributes());
+  NewCall->setDebugLoc(CB->getDebugLoc());
+  NewCall->setMemoryEffects(MayWriteMem ? MemoryEffects::writeOnly()
+                                        : MemoryEffects::readOnly());
----------------
nikic wrote:

This isn't right. What I'd expect to happen here is that you take the original 
memory effects and then add an `argMemOnly(ModRefInfo::Mod)` effect to it if 
you introduced an indirect output (or same with Ref for an input).

(The memory attributes should be tested in the IR test.)

https://github.com/llvm/llvm-project/pull/181973
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to