Author: Aditya Kumar Date: 2020-12-18T17:06:17-08:00 New Revision: 1ab4db0f847fa1ddd394dbf54a5051b626eab160
URL: https://github.com/llvm/llvm-project/commit/1ab4db0f847fa1ddd394dbf54a5051b626eab160 DIFF: https://github.com/llvm/llvm-project/commit/1ab4db0f847fa1ddd394dbf54a5051b626eab160.diff LOG: [HotColdSplit] Reflect full cost of parameters in split penalty Make the penalty for splitting a region more accurately reflect the cost of materializing all of the inputs/outputs to/from the region. This almost entirely eliminates code growth within functions which undergo splitting in key internal frameworks, and reduces the size of those frameworks between 2.6% to 3%. rdar://49167240 Patch by: Vedant Kumar(@vsk) Reviewers: hiraditya,rjf,t.p.northover Reviewed By: hiraditya,rjf Differential Revision: https://reviews.llvm.org/D59715 Added: Modified: llvm/lib/Transforms/IPO/HotColdSplitting.cpp llvm/test/Transforms/CodeExtractor/extract-assume.ll llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index b25b789d1dae..aa708ee520b1 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -67,6 +67,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> +#include <limits> #include <cassert> #include <string> @@ -96,6 +97,10 @@ static cl::opt<std::string> cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting.")); +static cl::opt<int> MaxParametersForSplit( + "hotcoldsplit-max-params", cl::init(4), cl::Hidden, + cl::desc("Maximum number of parameters for a split function")); + namespace { // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify // this function unless you modify the MBB version as well. @@ -257,18 +262,6 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region, if (SplittingThreshold <= 0) return Penalty; - // The typical code size cost for materializing an argument for the outlined - // call. - LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n"); - const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic; - Penalty += CostForArgMaterialization * NumInputs; - - // The typical code size cost for an output alloca, its associated store, and - // its associated reload. - LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n"); - const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic; - Penalty += CostForRegionOutput * NumOutputs; - // Find the number of distinct exit blocks for the region. Use a conservative // check to determine whether control returns from the region. bool NoBlocksReturn = true; @@ -289,6 +282,48 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region, } } + // Count the number of phis in exit blocks with >= 2 incoming values from the + // outlining region. These phis are split (\ref severSplitPHINodesOfExits), + // and new outputs are created to supply the split phis. CodeExtractor can't + // report these new outputs until extraction begins, but it's important to + // factor the cost of the outputs into the cost calculation. + unsigned NumSplitExitPhis = 0; + for (BasicBlock *ExitBB : SuccsOutsideRegion) { + for (PHINode &PN : ExitBB->phis()) { + // Find all incoming values from the outlining region. + int NumIncomingVals = 0; + for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) + if (find(Region, PN.getIncomingBlock(i)) != Region.end()) { + ++NumIncomingVals; + if (NumIncomingVals > 1) { + ++NumSplitExitPhis; + break; + } + } + } + } + + // Apply a penalty for calling the split function. Factor in the cost of + // materializing all of the parameters. + int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis; + int NumParams = NumInputs + NumOutputsAndSplitPhis; + if (NumParams > MaxParametersForSplit) { + LLVM_DEBUG(dbgs() << NumInputs << " inputs and " << NumOutputsAndSplitPhis + << " outputs exceeds parameter limit (" + << MaxParametersForSplit << ")\n"); + return std::numeric_limits<int>::max(); + } + const int CostForArgMaterialization = 2 * TargetTransformInfo::TCC_Basic; + LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumParams << " params\n"); + Penalty += CostForArgMaterialization * NumParams; + + // Apply the typical code size cost for an output alloca and its associated + // reload in the caller. Also penalize the associated store in the callee. + LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputsAndSplitPhis + << " outputs/split phis\n"); + const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic; + Penalty += CostForRegionOutput * NumOutputsAndSplitPhis; + // Apply a `noreturn` bonus. if (NoBlocksReturn) { LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size() @@ -298,7 +333,7 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region, // Apply a penalty for having more than one successor outside of the region. // This penalty accounts for the switch needed in the caller. - if (!SuccsOutsideRegion.empty()) { + if (SuccsOutsideRegion.size() > 1) { LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size() << " non-region successors\n"); Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic; diff --git a/llvm/test/Transforms/CodeExtractor/extract-assume.ll b/llvm/test/Transforms/CodeExtractor/extract-assume.ll index bf0d2ecb2d6b..ffba771856fb 100644 --- a/llvm/test/Transforms/CodeExtractor/extract-assume.ll +++ b/llvm/test/Transforms/CodeExtractor/extract-assume.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="function(slp-vectorizer),module(hotcoldsplit),function(slp-vectorizer,print<assumptions>)" -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes="function(slp-vectorizer),module(hotcoldsplit),function(slp-vectorizer,print<assumptions>)" -hotcoldsplit-threshold=-1 -disable-output %s 2>&1 | FileCheck %s ; ; Make sure this compiles. Check that function assumption cache is refreshed ; after extracting blocks with assume calls from the function. diff --git a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll index fffd6f9f5dcf..4906316816e9 100644 --- a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll +++ b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -hotcoldsplit-max-params=2 -S < %s -o /dev/null 2>&1 | FileCheck %s declare void @sink(i32*, i32, i32) cold @@ -10,10 +10,27 @@ define void @foo(i32 %arg) { br i1 undef, label %cold, label %exit cold: - ; CHECK: Applying penalty for: 2 inputs + ; CHECK: Applying penalty for splitting: 2 + ; CHECK-NEXT: Applying penalty for: 2 params + ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis + ; CHECK-NEXT: penalty = 6 call void @sink(i32* @g, i32 %arg, i32 %local) ret void exit: ret void } + +define void @bar(i32* %p1, i32 %p2, i32 %p3) { + br i1 undef, label %cold, label %exit + +cold: + ; CHECK: Applying penalty for splitting: 2 + ; CHECK-NEXT: 3 inputs and 0 outputs exceeds parameter limit (2) + ; CHECK-NEXT: penalty = 2147483647 + call void @sink(i32* %p1, i32 %p2, i32 %p3) + ret void + +exit: + ret void +} diff --git a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll index a7d9f97ab030..b7bf760b90c4 100644 --- a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll +++ b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -S < %s -o /dev/null 2>&1 | FileCheck %s declare void @sink() cold @@ -10,8 +10,10 @@ entry: br i1 undef, label %cold, label %exit cold: - ; CHECK: Applying penalty for: 1 output - ; CHECK: Applying penalty for: 1 non-region successors + ; CHECK: Applying penalty for splitting: 2 + ; CHECK-NEXT: Applying penalty for: 1 params + ; CHECK-NEXT: Applying penalty for: 1 outputs/split phis + ; CHECK-NEXT: penalty = 7 %local = load i32, i32* @g call void @sink() br label %exit diff --git a/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll b/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll index 3886d76da016..a9e3fc62e37c 100644 --- a/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll +++ b/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -S < %s -o /dev/null 2>&1 | FileCheck %s declare void @sink() cold @@ -9,7 +9,10 @@ entry: br i1 undef, label %cold1, label %exit cold1: - ; CHECK: Applying penalty for: 1 non-region successor + ; CHECK: Applying penalty for splitting: 2 + ; CHECK-NEXT: Applying penalty for: 0 params + ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis + ; CHECK-NEXT: penalty = 2 call void @sink() br i1 undef, label %cold2, label %cold3 @@ -32,7 +35,11 @@ entry: br i1 undef, label %cold1, label %exit1 cold1: - ; CHECK: Applying penalty for: 2 non-region successors + ; CHECK: Applying penalty for splitting: 2 + ; CHECK-NEXT: Applying penalty for: 0 params + ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis + ; CHECK-NEXT: Applying penalty for: 2 non-region successors + ; CHECK-NEXT: penalty = 3 call void @sink() br i1 undef, label %cold2, label %cold3 diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll index bdb46d584dcb..465d0e6add77 100644 --- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll +++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -S -instsimplify -hotcoldsplit -debug < %s 2>&1 | FileCheck %s +; RUN: opt -S -instsimplify -hotcoldsplit -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s ; RUN: opt -instcombine -hotcoldsplit -instsimplify %s -o /dev/null target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -13,7 +13,10 @@ target triple = "aarch64" ; CHECK-NOT: @llvm.assume ; CHECK: } ; CHECK: declare {{.*}}@llvm.assume -; CHECK: define {{.*}}@f.cold.1(i64 %0) +; CHECK: define {{.*}}@f.cold.1() +; CHECK-LABEL: newFuncRoot: +; CHECK: } +; CHECK: define {{.*}}@f.cold.2(i64 %0) ; CHECK-LABEL: newFuncRoot: ; CHECK: %1 = icmp eq i64 %0, 0 ; CHECK-NOT: call void @llvm.assume _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits