Title: [274179] trunk/PerformanceTests
Revision
274179
Author
[email protected]
Date
2021-03-09 15:08:41 -0800 (Tue, 09 Mar 2021)

Log Message

MotionMark scores are super sensitive to a single long frame
https://bugs.webkit.org/show_bug.cgi?id=220847
<rdar://problem/74152743>

Reviewed by Jon Lee.

Currently, "ramp" tests have three phases. The middle phase is where they try to determine a maximum reasonable
complexity, and the third one is where they try various complexities between 0 and the maximum. The calculation
of this maximum reasonable complexity is currently very sensitive to outlier frame times. If there is a single
outlier frame time, the failure mode is to assume that the maximum complexity is ~10. So, the solution is to
ignore outlier frame times during this first phase, and to ensure that there are at least 9 frames measured that
have non-outlier times.

This test also changes the speed of the middle phase. Previously, each interval during this phase had
a complexity that was 3.16x of the previous complexity. This patch changes that to 1.78x of the previous
complexity for complexities above 50, and 1.33x for complexities above 10,000.

* MotionMark/tests/resources/main.js:
(filterOutOutliers):
(_measureAndResetInterval):
(update):
(registerFrameTime):
(intervalHasConcluded):
(start):
(didFinishInterval):

Modified Paths

Diff

Modified: trunk/PerformanceTests/ChangeLog (274178 => 274179)


--- trunk/PerformanceTests/ChangeLog	2021-03-09 23:07:19 UTC (rev 274178)
+++ trunk/PerformanceTests/ChangeLog	2021-03-09 23:08:41 UTC (rev 274179)
@@ -1,3 +1,31 @@
+2021-03-09  Myles C. Maxfield  <[email protected]>
+
+        MotionMark scores are super sensitive to a single long frame
+        https://bugs.webkit.org/show_bug.cgi?id=220847
+        <rdar://problem/74152743>
+
+        Reviewed by Jon Lee.
+
+        Currently, "ramp" tests have three phases. The middle phase is where they try to determine a maximum reasonable
+        complexity, and the third one is where they try various complexities between 0 and the maximum. The calculation
+        of this maximum reasonable complexity is currently very sensitive to outlier frame times. If there is a single
+        outlier frame time, the failure mode is to assume that the maximum complexity is ~10. So, the solution is to
+        ignore outlier frame times during this first phase, and to ensure that there are at least 9 frames measured that
+        have non-outlier times.
+
+        This test also changes the speed of the middle phase. Previously, each interval during this phase had
+        a complexity that was 3.16x of the previous complexity. This patch changes that to 1.78x of the previous
+        complexity for complexities above 50, and 1.33x for complexities above 10,000.
+
+        * MotionMark/tests/resources/main.js:
+        (filterOutOutliers):
+        (_measureAndResetInterval):
+        (update):
+        (registerFrameTime):
+        (intervalHasConcluded):
+        (start):
+        (didFinishInterval):
+
 2021-02-26  Zalan Bujtas  <[email protected]>
 
         [Performance test][Line layout] Add test with inline boxes

Modified: trunk/PerformanceTests/MotionMark/tests/resources/main.js (274178 => 274179)


--- trunk/PerformanceTests/MotionMark/tests/resources/main.js	2021-03-09 23:07:19 UTC (rev 274178)
+++ trunk/PerformanceTests/MotionMark/tests/resources/main.js	2021-03-09 23:08:41 UTC (rev 274179)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2020 Apple Inc. All rights reserved.
+ * Copyright (C) 2015-2021 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -110,6 +110,20 @@
         return comment in this._marks;
     },
 
+    filterOutOutliers: function(array)
+    {
+        if (array.length == 0)
+            return [];
+
+        array.sort();
+        var q1 = array[Math.min(Math.round(array.length * 1 / 4), array.length - 1)];
+        var q3 = array[Math.min(Math.round(array.length * 3 / 4), array.length - 1)];
+        var interquartileRange = q3 - q1;
+        var minimum = q1 - interquartileRange * 1.5;
+        var maximum = q3 + interquartileRange * 1.5;
+        return array.filter(x => x >= minimum && x <= maximum);
+    },
+
     _measureAndResetInterval: function(currentTimestamp)
     {
         var sampleCount = this._sampler.sampleCount;
@@ -116,8 +130,13 @@
         var averageFrameLength = 0;
 
         if (this._intervalEndTimestamp) {
-            var intervalStartTimestamp = this._sampler.samples[0][this._intervalStartIndex];
-            averageFrameLength = (currentTimestamp - intervalStartTimestamp) / (sampleCount - this._intervalStartIndex);
+            var durations = [];
+            for (var i = Math.max(this._intervalStartIndex, 1); i < sampleCount; ++i) {
+                durations.push(this._sampler.samples[0][i] - this._sampler.samples[0][i - 1]);
+            }
+            var filteredDurations = this.filterOutOutliers(durations);
+            if (filteredDurations.length > 0)
+                averageFrameLength = filteredDurations.reduce((a, b) => a + b, 0) / filteredDurations.length;
         }
 
         this._intervalStartIndex = sampleCount;
@@ -138,15 +157,19 @@
                 this._frameLengthEstimator.sample(lastFrameLength);
                 frameLengthEstimate = this._frameLengthEstimator.estimate;
             }
-        } else if (timestamp >= this._intervalEndTimestamp) {
-            var intervalStartTimestamp = this._sampler.samples[0][this._intervalStartIndex];
-            intervalAverageFrameLength = this._measureAndResetInterval(timestamp);
-            if (this._isFrameLengthEstimatorEnabled) {
-                this._frameLengthEstimator.sample(intervalAverageFrameLength);
-                frameLengthEstimate = this._frameLengthEstimator.estimate;
+        } else {
+            this.registerFrameTime(lastFrameLength);
+            if (this.intervalHasConcluded(timestamp)) {
+                var intervalStartTimestamp = this._sampler.samples[0][this._intervalStartIndex];
+                intervalAverageFrameLength = this._measureAndResetInterval(timestamp);
+                if (this._isFrameLengthEstimatorEnabled) {
+                    this._frameLengthEstimator.sample(intervalAverageFrameLength);
+                    frameLengthEstimate = this._frameLengthEstimator.estimate;
+                }
+                didFinishInterval = true;
+                this.didFinishInterval(timestamp, stage, intervalAverageFrameLength);
+                this._frameLengthEstimator.reset();
             }
-            didFinishInterval = true;
-            this.didFinishInterval(timestamp, stage, intervalAverageFrameLength);
         }
 
         this._sampler.record(timestamp, stage.complexity(), frameLengthEstimate);
@@ -153,6 +176,15 @@
         this.tune(timestamp, stage, lastFrameLength, didFinishInterval, intervalAverageFrameLength);
     },
 
+    registerFrameTime: function(lastFrameLength)
+    {
+    },
+
+    intervalHasConcluded: function(timestamp)
+    {
+        return timestamp >= this._intervalEndTimestamp;
+    },
+
     didFinishInterval: function(timestamp, stage, intervalAverageFrameLength)
     {
     },
@@ -336,6 +368,8 @@
     tierFastTestLength: 250,
     // If the engine is under stress, let the test run a little longer to let the measurement settle
     tierSlowTestLength: 750,
+    // Tier intervals must have this number of non-outlier frames in order to end.
+    numberOfFramesRequiredInInterval: 9,
 
     rampWarmupLength: 200,
 
@@ -355,10 +389,25 @@
         Controller.prototype.start.call(this, startTimestamp, stage);
         this._rampStartTimestamp = 0;
         this.intervalSamplingLength = 100;
+        this._frameTimeHistory = [];
     },
 
+    registerFrameTime: function(lastFrameLength)
+    {
+        this._frameTimeHistory.push(lastFrameLength);
+    },
+
+    intervalHasConcluded: function(timestamp)
+    {
+        if (!Controller.prototype.intervalHasConcluded.call(this, timestamp))
+            return false;
+
+        return this._finishedTierSampling || this.filterOutOutliers(this._frameTimeHistory).length > this.numberOfFramesRequiredInInterval;
+    },
+
     didFinishInterval: function(timestamp, stage, intervalAverageFrameLength)
     {
+        this._frameTimeHistory = [];
         if (!this._finishedTierSampling) {
             if (this._tierStartTimestamp > 0 && timestamp < this._tierStartTimestamp + this.tierFastTestLength)
                 return;
@@ -378,9 +427,14 @@
                 this._lastTierComplexity = currentComplexity;
                 this._lastTierFrameLength = currentFrameLength;
 
-                this._tier += .5;
+                if (currentComplexity <= 50)
+                    this._tier += 1/2;
+                else if (currentComplexity <= 10000)
+                    this._tier += 1/4;
+                else
+                    this._tier += 1/8;
                 this._endTimestamp = timestamp + this._testLength;
-                var nextTierComplexity = Math.round(Math.pow(10, this._tier));
+                var nextTierComplexity = Math.max(Math.round(Math.pow(10, this._tier)), currentComplexity + 1);
                 stage.tune(nextTierComplexity - currentComplexity);
 
                 // Some tests may be unable to go beyond a certain capacity. If so, don't keep moving up tiers
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to