avl updated this revision to Diff 273029.
avl added a comment.
removed usages of AllocaDerivedValueTracker from canTRE().
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D82085/new/
https://reviews.llvm.org/D82085
Files:
llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
llvm/test/Transforms/TailCallElim/basic.ll
llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
Index: llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+
+; IR for that test was generated from the following C++ source:
+;
+;int count;
+;__attribute__((noinline)) void globalIncrement(const int* param) { count += *param; }
+;
+;void test(int recurseCount)
+;{
+; if (recurseCount == 0) return;
+; int temp = 10;
+; globalIncrement(&temp);
+; test(recurseCount - 1);
+;}
+;
+
+@count = dso_local local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: nofree noinline norecurse nounwind uwtable
+define dso_local void @_Z15globalIncrementPKi(i32* nocapture readonly %param) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, i32* %param, align 4
+ %1 = load i32, i32* @count, align 4
+ %add = add nsw i32 %1, %0
+ store i32 %add, i32* @count, align 4
+ ret void
+}
+
+; Test that TRE could be done for recursive tail routine containing
+; call to function receiving a pointer to local stack.
+
+; CHECK: void @_Z4testi
+; CHECK: br label %tailrecurse
+; CHECK: tailrecurse:
+; CHECK-NOT: call void @_Z4testi
+; CHECK: br label %tailrecurse
+; CHECK-NOT: call void @_Z4testi
+; CHECK: ret
+
+; Function Attrs: nounwind uwtable
+define dso_local void @_Z4testi(i32 %recurseCount) local_unnamed_addr #1 {
+entry:
+ %temp = alloca i32, align 4
+ %cmp = icmp eq i32 %recurseCount, 0
+ br i1 %cmp, label %return, label %if.end
+
+if.end: ; preds = %entry
+ %0 = bitcast i32* %temp to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #6
+ store i32 10, i32* %temp, align 4
+ call void @_Z15globalIncrementPKi(i32* nonnull %temp)
+ %sub = add nsw i32 %recurseCount, -1
+ call void @_Z4testi(i32 %sub)
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #6
+ br label %return
+
+return: ; preds = %entry, %if.end
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+attributes #0 = { nofree noinline norecurse nounwind uwtable }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { argmemonly nounwind willreturn }
Index: llvm/test/Transforms/TailCallElim/basic.ll
===================================================================
--- llvm/test/Transforms/TailCallElim/basic.ll
+++ llvm/test/Transforms/TailCallElim/basic.ll
@@ -19,8 +19,8 @@
%A = alloca i32 ; <i32*> [#uses=2]
store i32 5, i32* %A
call void @use(i32* %A)
-; CHECK: tail call i32 @test1
- %X = tail call i32 @test1() ; <i32> [#uses=1]
+; CHECK: call i32 @test1
+ %X = call i32 @test1() ; <i32> [#uses=1]
ret i32 %X
}
Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -89,16 +89,6 @@
STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
-/// Scan the specified function for alloca instructions.
-/// If it contains any dynamic allocas, returns false.
-static bool canTRE(Function &F) {
- // Because of PR962, we don't TRE dynamic allocas.
- return llvm::all_of(instructions(F), [](Instruction &I) {
- auto *AI = dyn_cast<AllocaInst>(&I);
- return !AI || AI->isStaticAlloca();
- });
-}
-
namespace {
struct AllocaDerivedValueTracker {
// Start at a root value and walk its use-def chain to mark calls that use the
@@ -185,11 +175,9 @@
};
}
-static bool markTails(Function &F, bool &AllCallsAreTailCalls,
- OptimizationRemarkEmitter *ORE) {
+static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
if (F.callsFunctionThatReturnsTwice())
return false;
- AllCallsAreTailCalls = true;
// The local stack holds all alloca instructions and all byval arguments.
AllocaDerivedValueTracker Tracker;
@@ -272,11 +260,8 @@
}
}
- if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
DeferredTails.push_back(CI);
- } else {
- AllCallsAreTailCalls = false;
- }
}
for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
@@ -313,8 +298,6 @@
LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
CI->setTailCall();
Modified = true;
- } else {
- AllCallsAreTailCalls = false;
}
}
@@ -326,6 +309,14 @@
/// instructions between the call and this instruction are movable.
///
static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
+ if (isa<DbgInfoIntrinsic>(I))
+ return true;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
+ II->getIntrinsicID() == Intrinsic::assume)
+ return true;
+
// FIXME: We can move load/store/call/free instructions above the call if the
// call does not mod/ref the memory location being processed.
if (I->mayHaveSideEffects()) // This also handles volatile loads.
@@ -366,8 +357,15 @@
(I->getOperand(0) != CI && I->getOperand(1) != CI))
return false;
+ Instruction *AccumulatorInstr = I;
+
+ if (AccumulatorInstr->hasOneUse() &&
+ isa<PHINode>(AccumulatorInstr->user_back()))
+ AccumulatorInstr = AccumulatorInstr->user_back();
+
// The only user of this instruction we allow is a single return instruction.
- if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
+ if (!AccumulatorInstr->hasOneUse() ||
+ !isa<ReturnInst>(AccumulatorInstr->user_back()))
return false;
return true;
@@ -392,7 +390,6 @@
// createTailRecurseLoopHeader the first time we find a call we can eliminate.
BasicBlock *HeaderBB = nullptr;
SmallVector<PHINode *, 8> ArgumentPHIs;
- bool RemovableCallsMustBeMarkedTail = false;
// PHI node to store our return value.
PHINode *RetPN = nullptr;
@@ -419,8 +416,7 @@
DomTreeUpdater &DTU)
: F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
- CallInst *findTRECandidate(Instruction *TI,
- bool CannotTailCallElimCallsMarkedTail);
+ CallInst *findTRECandidate(Instruction *TI);
void createTailRecurseLoopHeader(CallInst *CI);
@@ -428,14 +424,14 @@
bool eliminateCall(CallInst *CI);
- bool foldReturnAndProcessPred(ReturnInst *Ret,
- bool CannotTailCallElimCallsMarkedTail);
+ bool foldReturnAndProcessPred(ReturnInst *Ret);
- bool processReturningBlock(ReturnInst *Ret,
- bool CannotTailCallElimCallsMarkedTail);
+ bool processReturningBlock(ReturnInst *Ret);
void cleanupAndFinalize();
+ bool canTRE();
+
public:
static bool eliminate(Function &F, const TargetTransformInfo *TTI,
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
@@ -443,8 +439,7 @@
};
} // namespace
-CallInst *TailRecursionEliminator::findTRECandidate(
- Instruction *TI, bool CannotTailCallElimCallsMarkedTail) {
+CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
BasicBlock *BB = TI->getParent();
if (&BB->front() == TI) // Make sure there is something before the terminator.
@@ -464,11 +459,6 @@
--BBI;
}
- // If this call is marked as a tail call, and if there are dynamic allocas in
- // the function, we cannot perform this optimization.
- if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return nullptr;
-
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
// and disable this xform in this case, because the code generator will
@@ -498,26 +488,13 @@
BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
BI->setDebugLoc(CI->getDebugLoc());
- // If this function has self recursive calls in the tail position where some
- // are marked tail and some are not, only transform one flavor or another.
- // We have to choose whether we move allocas in the entry block to the new
- // entry block or not, so we can't make a good choice for both. We make this
- // decision here based on whether the first call we found to remove is
- // marked tail.
- // NOTE: We could do slightly better here in the case that the function has
- // no entry block allocas.
- RemovableCallsMustBeMarkedTail = CI->isTailCall();
-
- // If this tail call is marked 'tail' and if there are any allocas in the
- // entry block, move them up to the new entry block.
- if (RemovableCallsMustBeMarkedTail)
- // Move all fixed sized allocas from HeaderBB to NewEntry.
- for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
- NEBI = NewEntry->begin();
- OEBI != E;)
- if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
- if (isa<ConstantInt>(AI->getArraySize()))
- AI->moveBefore(&*NEBI);
+ // Move all fixed sized allocas from HeaderBB to NewEntry.
+ for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
+ NEBI = NewEntry->begin();
+ OEBI != E;)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ AI->moveBefore(&*NEBI);
// Now that we have created a new block, which jumps to the entry
// block, insert a PHI node for each argument of the function.
@@ -620,9 +597,6 @@
if (!HeaderBB)
createTailRecurseLoopHeader(CI);
- if (RemovableCallsMustBeMarkedTail && !CI->isTailCall())
- return false;
-
// Ok, now that we know we have a pseudo-entry block WITH all of the
// required PHI nodes, add entries into the PHI node for the actual
// parameters passed into the tail-recursive call.
@@ -672,8 +646,7 @@
return true;
}
-bool TailRecursionEliminator::foldReturnAndProcessPred(
- ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
+bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
BasicBlock *BB = Ret->getParent();
bool Change = false;
@@ -698,8 +671,7 @@
while (!UncondBranchPreds.empty()) {
BranchInst *BI = UncondBranchPreds.pop_back_val();
BasicBlock *Pred = BI->getParent();
- if (CallInst *CI =
- findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) {
+ if (CallInst *CI = findTRECandidate(BI)) {
LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred);
FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
@@ -720,9 +692,8 @@
return Change;
}
-bool TailRecursionEliminator::processReturningBlock(
- ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
- CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) {
+ CallInst *CI = findTRECandidate(Ret);
if (!CI)
return false;
@@ -801,6 +772,70 @@
}
}
+/// Returns true if this is a block with "return", "unreachable"
+/// or "unconditional branch" to other block with "return"
+/// instruction.
+static bool isTailBlock(BasicBlock *BB) {
+ Instruction *LastBlockInstr = BB->getTerminator();
+
+ // Check that last instruction is a "return", either "unreachable",
+ // either branch to other block containing "return".
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(LastBlockInstr))
+ return true;
+
+ if (isa<UnreachableInst>(LastBlockInstr))
+ return true;
+
+ if (BranchInst *Branch = dyn_cast<BranchInst>(LastBlockInstr)) {
+ if (Branch->isUnconditional())
+ if (isa<ReturnInst>(Branch->getSuccessor(0)->getFirstNonPHIOrDbg()))
+ return true;
+ }
+
+ return false;
+}
+
+/// Checks that specified call instruction is in position
+/// suitable for tail recursive elimination.
+static bool isInTREPosition(CallInst *Inst, AliasAnalysis *AA) {
+ if (!isTailBlock(Inst->getParent()))
+ return false;
+
+ BasicBlock::iterator BBI(Inst->getParent()->getTerminator());
+ for (--BBI; &*BBI != Inst; --BBI) {
+ if (!canMoveAboveCall(&*BBI, Inst, AA) &&
+ !canTransformAccumulatorRecursion(&*BBI, Inst))
+ return false;
+ }
+
+ return true;
+}
+
+bool TailRecursionEliminator::canTRE() {
+ for (auto &BB : F) {
+ for (Instruction &I : BB) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ // Because of PR962, we don't TRE dynamic allocas.
+ if (!AI->isStaticAlloca())
+ return false;
+ } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->getCalledFunction() == &F) {
+ if (!isInTREPosition(CI, AA))
+ continue;
+
+ // Do not do TRE if CI explicitly marked as NoTailcall or has Operand
+ // Bundles or not marked as TailCall.
+ if (CI->isNoTailCall() || CI->hasOperandBundles() ||
+ !CI->isTailCall())
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
bool TailRecursionEliminator::eliminate(Function &F,
const TargetTransformInfo *TTI,
AliasAnalysis *AA,
@@ -810,23 +845,18 @@
return false;
bool MadeChange = false;
- bool AllCallsAreTailCalls = false;
- MadeChange |= markTails(F, AllCallsAreTailCalls, ORE);
- if (!AllCallsAreTailCalls)
- return MadeChange;
+ MadeChange |= markTails(F, ORE);
// If this function is a varargs function, we won't be able to PHI the args
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg())
return MadeChange;
- // If false, we cannot perform TRE on tail calls marked with the 'tail'
- // attribute, because doing so would cause the stack size to increase (real
- // TRE would deallocate variable sized allocas, TRE doesn't).
- bool CanTRETailMarkedCall = canTRE(F);
-
TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
+ if (!TRE.canTRE())
+ return MadeChange;
+
// Change any tail recursive calls to loops.
//
// FIXME: The code generator produces really bad code when an 'escaping
@@ -836,9 +866,9 @@
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall);
+ bool Change = TRE.processReturningBlock(Ret);
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall);
+ Change = TRE.foldReturnAndProcessPred(Ret);
MadeChange |= Change;
}
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits