Modified: trunk/Source/_javascript_Core/bytecode/CodeBlock.cpp (203328 => 203329)
--- trunk/Source/_javascript_Core/bytecode/CodeBlock.cpp 2016-07-16 21:33:01 UTC (rev 203328)
+++ trunk/Source/_javascript_Core/bytecode/CodeBlock.cpp 2016-07-17 00:19:41 UTC (rev 203329)
@@ -3693,16 +3693,68 @@
double CodeBlock::optimizationThresholdScalingFactor()
{
- // We want a good threshold based on the instruction count.
- // Here, we are trying to optimize the following formula:
- // F[x_] =: a * Sqrt[x + b] + Abs[c * x] + d
- // The parameters were chosen by testing random values
- // between 1 and 2 and keeping the best combination.
- const double a = Options::optimizationThresholdScalingFactorA();
- const double b = Options::optimizationThresholdScalingFactorB();
- const double c = Options::optimizationThresholdScalingFactorC();
- const double d = Options::optimizationThresholdScalingFactorD();
-
+ // This _expression_ arises from doing a least-squares fit of
+ //
+ // F[x_] =: a * Sqrt[x + b] + Abs[c * x] + d
+ //
+ // against the data points:
+ //
+ // x F[x_]
+ // 10 0.9 (smallest reasonable code block)
+ // 200 1.0 (typical small-ish code block)
+ // 320 1.2 (something I saw in 3d-cube that I wanted to optimize)
+ // 1268 5.0 (something I saw in 3d-cube that I didn't want to optimize)
+ // 4000 5.5 (random large size, used to cause the function to converge to a shallow curve of some sort)
+ // 10000 6.0 (similar to above)
+ //
+ // I achieve the minimization using the following Mathematica code:
+ //
+ // MyFunctionTemplate[x_, a_, b_, c_, d_] := a*Sqrt[x + b] + Abs[c*x] + d
+ //
+ // samples = {{10, 0.9}, {200, 1}, {320, 1.2}, {1268, 5}, {4000, 5.5}, {10000, 6}}
+ //
+ // solution =
+ // Minimize[Plus @@ ((MyFunctionTemplate[#[[1]], a, b, c, d] - #[[2]])^2 & /@ samples),
+ // {a, b, c, d}][[2]]
+ //
+ // And the code below (to initialize a, b, c, d) is generated by:
+ //
+ // Print["const double " <> ToString[#[[1]]] <> " = " <>
+ // If[#[[2]] < 0.00001, "0.0", ToString[#[[2]]]] <> ";"] & /@ solution
+ //
+ // We've long known the following to be true:
+ // - Small code blocks are cheap to optimize and so we should do it sooner rather
+ // than later.
+ // - Large code blocks are expensive to optimize and so we should postpone doing so,
+ // and sometimes have a large enough threshold that we never optimize them.
+ // - The difference in cost is not totally linear because (a) just invoking the
+ // DFG incurs some base cost and (b) for large code blocks there is enough slop
+ // in the correlation between instruction count and the actual compilation cost
+ // that for those large blocks, the instruction count should not have a strong
+ // influence on our threshold.
+ //
+ // I knew the goals but I didn't know how to achieve them; so I picked an interesting
+ // example where the heuristics were right (code block in 3d-cube with instruction
+ // count 320, which got compiled early as it should have been) and one where they were
+ // totally wrong (code block in 3d-cube with instruction count 1268, which was expensive
+ // to compile and didn't run often enough to warrant compilation in my opinion), and
+ // then threw in additional data points that represented my own guess of what our
+ // heuristics should do for some round-numbered examples.
+ //
+ // The _expression_ to which I decided to fit the data arose because I started with an
+ // affine function, and then did two things: put the linear part in an Abs to ensure
+ // that the fit didn't end up choosing a negative value of c (which would result in
+ // the function turning over and going negative for large x) and I threw in a Sqrt
+ // term because Sqrt represents my intution that the function should be more sensitive
+ // to small changes in small values of x, but less sensitive when x gets large.
+
+ // Note that the current fit essentially eliminates the linear portion of the
+ // _expression_ (c == 0.0).
+ const double a = 0.061504;
+ const double b = 1.02406;
+ const double c = 0.0;
+ const double d = 0.825914;
+
double instructionCount = this->instructionCount();
ASSERT(instructionCount); // Make sure this is called only after we have an instruction stream; otherwise it'll just return the value of d, which makes no sense.
Modified: trunk/Source/_javascript_Core/runtime/Options.h (203328 => 203329)
--- trunk/Source/_javascript_Core/runtime/Options.h 2016-07-16 21:33:01 UTC (rev 203328)
+++ trunk/Source/_javascript_Core/runtime/Options.h 2016-07-17 00:19:41 UTC (rev 203329)
@@ -98,44 +98,6 @@
unsigned m_highLimit;
};
-#if CPU(X86_64)
-constexpr int32_t archThresholdForJITAfterWarmUp = 610;
-constexpr int32_t archThresholdForJITSoon = 89;
-constexpr int32_t archThresholdForOptimizeAfterWarmUp = 864;
-constexpr int32_t archThresholdForOptimizeAfterLongWarmUp = 1489;
-constexpr int32_t archThresholdForOptimizeSoon = 864;
-constexpr int32_t archExecutionCounterIncrementForLoop = 2;
-constexpr int32_t archExecutionCounterIncrementForEntry = 16;
-constexpr int32_t archThresholdForFTLOptimizeAfterWarmUp = 109160;
-constexpr int32_t archThresholdForFTLOptimizeSoon = 825;
-constexpr int32_t archFtlTierUpCounterIncrementForLoop = 8;
-constexpr int32_t archFtlTierUpCounterIncrementForReturn = 27;
-constexpr unsigned archFtlOSREntryRetryThreshold = 109;
-constexpr double archOptimizationThresholdScalingFactorA = 0.0258587392234135;
-constexpr double archOptimizationThresholdScalingFactorB = 1.2428014544978696;
-constexpr double archOptimizationThresholdScalingFactorC = 0.0013276440240339;
-constexpr double archOptimizationThresholdScalingFactorD = 1.3130654609331458;
-constexpr int32_t archEvalThresholdMultiplier = 12;
-#else
-constexpr int32_t archThresholdForJITAfterWarmUp = 500;
-constexpr int32_t archThresholdForJITSoon = 100;
-constexpr int32_t archThresholdForOptimizeAfterWarmUp = 1000;
-constexpr int32_t archThresholdForOptimizeAfterLongWarmUp = 1000;
-constexpr int32_t archThresholdForOptimizeSoon = 1000;
-constexpr int32_t archExecutionCounterIncrementForLoop = 1;
-constexpr int32_t archExecutionCounterIncrementForEntry = 15;
-constexpr int32_t archThresholdForFTLOptimizeAfterWarmUp = 100000;
-constexpr int32_t archThresholdForFTLOptimizeSoon = 1000;
-constexpr int32_t archFtlTierUpCounterIncrementForLoop = 1;
-constexpr int32_t archFtlTierUpCounterIncrementForReturn = 15;
-constexpr unsigned archFtlOSREntryRetryThreshold = 100;
-constexpr double archOptimizationThresholdScalingFactorA = 0.061504;
-constexpr double archOptimizationThresholdScalingFactorB = 1.02406;
-constexpr double archOptimizationThresholdScalingFactorC = 0.0;
-constexpr double archOptimizationThresholdScalingFactorD = 0.825914;
-constexpr int32_t archEvalThresholdMultiplier = 10;
-#endif
-
typedef OptionRange optionRange;
typedef const char* optionString;
@@ -291,28 +253,23 @@
\
v(double, jitPolicyScale, 1.0, Normal, "scale JIT thresholds to this specified ratio between 0.0 (compile ASAP) and 1.0 (compile like normal).") \
v(bool, forceEagerCompilation, false, Normal, nullptr) \
- v(int32, thresholdForJITAfterWarmUp, archThresholdForJITAfterWarmUp, Normal, nullptr) \
- v(int32, thresholdForJITSoon, archThresholdForJITSoon, Normal, nullptr) \
+ v(int32, thresholdForJITAfterWarmUp, 500, Normal, nullptr) \
+ v(int32, thresholdForJITSoon, 100, Normal, nullptr) \
\
- v(int32, thresholdForOptimizeAfterWarmUp, archThresholdForOptimizeAfterWarmUp, Normal, nullptr) \
- v(int32, thresholdForOptimizeAfterLongWarmUp, archThresholdForOptimizeAfterLongWarmUp, Normal, nullptr) \
- v(int32, thresholdForOptimizeSoon, archThresholdForOptimizeSoon, Normal, nullptr) \
- v(int32, executionCounterIncrementForLoop, archExecutionCounterIncrementForLoop, Normal, nullptr) \
- v(int32, executionCounterIncrementForEntry, archExecutionCounterIncrementForEntry, Normal, nullptr) \
+ v(int32, thresholdForOptimizeAfterWarmUp, 1000, Normal, nullptr) \
+ v(int32, thresholdForOptimizeAfterLongWarmUp, 1000, Normal, nullptr) \
+ v(int32, thresholdForOptimizeSoon, 1000, Normal, nullptr) \
+ v(int32, executionCounterIncrementForLoop, 1, Normal, nullptr) \
+ v(int32, executionCounterIncrementForEntry, 15, Normal, nullptr) \
\
- v(int32, thresholdForFTLOptimizeAfterWarmUp, archThresholdForFTLOptimizeAfterWarmUp, Normal, nullptr) \
- v(int32, thresholdForFTLOptimizeSoon, archThresholdForFTLOptimizeSoon, Normal, nullptr) \
- v(int32, ftlTierUpCounterIncrementForLoop, archFtlTierUpCounterIncrementForLoop, Normal, nullptr) \
- v(int32, ftlTierUpCounterIncrementForReturn, archFtlTierUpCounterIncrementForReturn, Normal, nullptr) \
+ v(int32, thresholdForFTLOptimizeAfterWarmUp, 100000, Normal, nullptr) \
+ v(int32, thresholdForFTLOptimizeSoon, 1000, Normal, nullptr) \
+ v(int32, ftlTierUpCounterIncrementForLoop, 1, Normal, nullptr) \
+ v(int32, ftlTierUpCounterIncrementForReturn, 15, Normal, nullptr) \
v(unsigned, ftlOSREntryFailureCountForReoptimization, 15, Normal, nullptr) \
- v(unsigned, ftlOSREntryRetryThreshold, archFtlOSREntryRetryThreshold, Normal, nullptr) \
+ v(unsigned, ftlOSREntryRetryThreshold, 100, Normal, nullptr) \
\
- v(double, optimizationThresholdScalingFactorA, archOptimizationThresholdScalingFactorA, Normal, nullptr) \
- v(double, optimizationThresholdScalingFactorB, archOptimizationThresholdScalingFactorB, Normal, nullptr) \
- v(double, optimizationThresholdScalingFactorC, archOptimizationThresholdScalingFactorC, Normal, nullptr) \
- v(double, optimizationThresholdScalingFactorD, archOptimizationThresholdScalingFactorD, Normal, nullptr) \
- \
- v(int32, evalThresholdMultiplier, archEvalThresholdMultiplier, Normal, nullptr) \
+ v(int32, evalThresholdMultiplier, 10, Normal, nullptr) \
v(unsigned, maximumEvalCacheableSourceLength, 256, Normal, nullptr) \
\
v(bool, randomizeExecutionCountsBetweenCheckpoints, false, Normal, nullptr) \
Modified: trunk/Tools/Scripts/run-jsc-stress-tests (203328 => 203329)
--- trunk/Tools/Scripts/run-jsc-stress-tests 2016-07-16 21:33:01 UTC (rev 203328)
+++ trunk/Tools/Scripts/run-jsc-stress-tests 2016-07-17 00:19:41 UTC (rev 203329)
@@ -429,7 +429,7 @@
# We force all tests to use a smaller (1.5M) stack so that stack overflow tests can run faster.
BASE_OPTIONS = ["--useFTLJIT=false", "--useFunctionDotArguments=true", "--maxPerThreadStackUsage=1572864"]
EAGER_OPTIONS = ["--thresholdForJITAfterWarmUp=10", "--thresholdForJITSoon=10", "--thresholdForOptimizeAfterWarmUp=20", "--thresholdForOptimizeAfterLongWarmUp=20", "--thresholdForOptimizeSoon=20", "--thresholdForFTLOptimizeAfterWarmUp=20", "--thresholdForFTLOptimizeSoon=20", "--maximumEvalCacheableSourceLength=150000"]
-NO_CJIT_OPTIONS = ["--useConcurrentJIT=false", "--thresholdForJITAfterWarmUp=100", "--thresholdForJITSoon=100", "--thresholdForOptimizeAfterWarmUp=1000", "--thresholdForOptimizeAfterLongWarmUp=1000", "--thresholdForOptimizeSoon=1000", "--executionCounterIncrementForLoop=1", "--executionCounterIncrementForEntry=15", "--thresholdForFTLOptimizeAfterWarmUp=100000", "--thresholdForFTLOptimizeSoon=1000", "--ftlTierUpCounterIncrementForLoop=1", "--ftlTierUpCounterIncrementForReturn=15", "--evalThresholdMultiplier=10", "--optimizationThresholdScalingFactorA=0.061504", "--optimizationThresholdScalingFactorB=1.02406", "--optimizationThresholdScalingFactorC=0.0", "--optimizationThresholdScalingFactorD=0.825914"]
+NO_CJIT_OPTIONS = ["--useConcurrentJIT=false", "--thresholdForJITAfterWarmUp=100"]
FTL_OPTIONS = ["--useFTLJIT=true"]
$runlist = []