Diff
Modified: trunk/Tools/ChangeLog (292774 => 292775)
--- trunk/Tools/ChangeLog 2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/ChangeLog 2022-04-12 17:04:48 UTC (rev 292775)
@@ -1,3 +1,38 @@
+2022-04-12 Angelos Oikonomopoulos <[email protected]>
+
+ [JSC] Report test flakiness to resultsdb
+ https://bugs.webkit.org/show_bug.cgi?id=238806
+
+ Reviewed by Jonathan Bedard.
+
+ Include flakiness info in the report sent to results.webkit.org.
+ This is a prerequisite to using --treat-failing-as-flaky in
+ post-commit JSC bots, as otherwise we'd lose any flakiness info
+ for tests that made it over the specified threshold.
+
+ For completeness, this patch includes the flakiness information
+ for both passed and failing tests.
+
+ It also bumps the hard iteration limit to 50 (a cap to the value
+ specified in --treat-failing-as-flaky). While here, add some
+ self-checking code that verifies the numbers add up. This needs
+ some minor refactoring in the tests to not declare an error when
+ the results are incomplete on purpose.
+
+ * Scripts/run-_javascript_core-tests:
+ (runJSCStressTests):
+ Propagate the flakiness information reported by run-jsc-stress
+ tests. Report known flakiness for both failing and passing tests.
+ * Scripts/run-jsc-stress-tests:
+ Bump iterationsCeiling. Ask the evaluator to
+ treatIncompleteAsFailed and perform validition.
+ * Scripts/webkitruby/jsc-stress-test/executor.rb:
+ Introduce and use acceptIncomplete for the test that needs it.
+ * Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb:
+ Introduce and use treatIncompleteAsFailed. Report both passed and
+ failed tests as flaky when they have been (subsequent to user
+ request) executed multiple times.
+
2022-04-12 Youenn Fablet <[email protected]>
Add persistent notification click API test
Modified: trunk/Tools/Scripts/run-_javascript_core-tests (292774 => 292775)
--- trunk/Tools/Scripts/run-_javascript_core-tests 2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/run-_javascript_core-tests 2022-04-12 17:04:48 UTC (rev 292775)
@@ -40,6 +40,7 @@
use List::Util qw(min max);
use POSIX;
use webkitdirs;
+use Scalar::Util qw(looks_like_number);
use Text::ParseWords;
# determine configuration
@@ -946,21 +947,6 @@
$reportData{$testSucceeded} = {actual => "PASS"};
}
- my @jscStressFlaky = readAllLines($jscStressResultsDir . "/flaky");
- @jscStressFlaky = sort @jscStressFlaky;
- my $numJSCStressFlaky = @jscStressFlaky;
-
- my %jscStressFlakyInfo = ();
- if ($numJSCStressFlaky) {
- print "\n** The following JSC stress tests were flaky:\n";
- foreach my $testFlaky (@jscStressFlaky) {
- my @fields = split / /, $testFlaky;
- print "\t$fields[0] $fields[1]/$fields[2] passes\n";
- $jscStressFlakyInfo{$fields[0]} = { 'P' => $fields[1], 'T' => $fields[2]}
- }
- print "\n";
- }
-
my @jscStressFailList = readAllLines($jscStressResultsDir . "/failed");
@jscStressFailList = sort @jscStressFailList;
my $numJSCStressFailures = @jscStressFailList;
@@ -975,6 +961,32 @@
}
print "\n";
+ my @jscStressFlaky = readAllLines($jscStressResultsDir . "/flaky");
+ @jscStressFlaky = sort @jscStressFlaky;
+ my $numJSCStressFlaky = @jscStressFlaky;
+
+ my %jscStressFlakyInfo = ();
+
+ my $numJSCStressFlakyButPassed = 0;
+ if ($numJSCStressFlaky) {
+ print "\n** The following JSC stress tests were flaky:\n";
+ foreach my $testFlaky (@jscStressFlaky) {
+ my @fields = split / /, $testFlaky;
+ my $testName = $fields[0];
+ my $numPasses = $fields[1];
+ my $numTotal = $fields[2];
+ looks_like_number($fields[3]) || die "Malformed line: $testFlaky";
+ my $successful = int($fields[3]);
+ my $passFail = $successful ? "passed" : "failed";
+ print "\t$testName $fields[1]/$fields[2] passes, $passFail\n";
+ $jscStressFlakyInfo{$testName} = { 'P' => $numPasses, 'T' => $numTotal, 'S' => $successful };
+ # NB: the dict has been created above, we're only adding the flakiness field.
+ $reportData{$testName}{"flakiness_num_passes"} = int($numPasses);
+ $reportData{$testName}{"flakiness_num_tries"} = int($numTotal);
+ }
+ print "\n";
+ }
+
my @jscStressNoResultList = readAllLines($jscStressResultsDir . "/noresult");
my $numJSCStressNoResultTests = @jscStressNoResultList;
@@ -987,7 +999,8 @@
print "Results for JSC stress tests:\n";
printThingsFound($numJSCStressFailures, "failure", "failures", "found");
- printThingsFound($numJSCStressFlaky, "test", "tests", "flaky but passed");
+ printThingsFound($numJSCStressFlakyButPassed, "test", "tests", "flaky but passed");
+ printThingsFound($numJSCStressFlaky - $numJSCStressFlakyButPassed, "test", "tests", "flaky and failed");
printThingsFound($numJSCStressNoResultTests, "test", "tests", "failed to complete");
print " OK.\n" if $numJSCStressFailures == 0 and $numJSCStressNoResultTests == 0;
@@ -994,7 +1007,7 @@
print "\n";
if (defined($jsonFileName)) {
- $jsonData{'flakyAndPassed'} = \%jscStressFlakyInfo;
+ $jsonData{'flaky'} = \%jscStressFlakyInfo;
$jsonData{'stressTestFailures'} = \@jscStressFailList;
}
Modified: trunk/Tools/Scripts/run-jsc-stress-tests (292774 => 292775)
--- trunk/Tools/Scripts/run-jsc-stress-tests 2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/run-jsc-stress-tests 2022-04-12 17:04:48 UTC (rev 292775)
@@ -84,7 +84,7 @@
PARALLEL_REMOTE_WRAPPER_MARK_END = "a9aea5c3b843"
PARALLEL_REMOTE_STATE_LOST_MARKER = "709fb7a77c45231918eb118a"
ITERATION_LIMITS = OpenStruct.new(:infraIterationsFloor => 3,
- :iterationsCeiling => 10)
+ :iterationsCeiling => 50)
REMOTE_TIMEOUT = 120
SSH_OPTIONS_DEFAULT = [
@@ -2813,7 +2813,8 @@
end
evaluator = TestResultEvaluatorFinal.new($runlist, statusMap)
- evaluator.visit!
+ evaluator.visit!({:treatIncompleteAsFailed => true})
+ evaluator.validate
if evaluator.noresult > 0
$stderr.puts("Could not get the exit status for #{evaluator.noresult} tests")
Modified: trunk/Tools/Scripts/webkitruby/jsc-stress-test/executor.rb (292774 => 292775)
--- trunk/Tools/Scripts/webkitruby/jsc-stress-test/executor.rb 2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/webkitruby/jsc-stress-test/executor.rb 2022-04-12 17:04:48 UTC (rev 292775)
@@ -169,10 +169,18 @@
end
module ExecutorSelfTests
+ class TestCase
+ attr_reader :testsWithFlaky, :iterations, :expectedTestResults, :opts
+ def initialize(testsWithFlaky, iterations, expectedTestResults, opts={})
+ @testsWithFlaky = testsWithFlaky
+ @iterations = iterations
+ @expectedTestResults = expectedTestResults
+ @opts = opts
+ end
+ end
FinalTestResult = Struct.new(:result)
TestResult = Struct.new(:results)
Iteration = Struct.new(:testResults)
- TestCase = Struct.new(:testsWithFlaky, :iterations, :expectedTestResults, :treatFailingAsFlaky)
MOCK_TEST_FAMILY = "f"
ITERATION_LIMITS = OpenStruct.new(:infraIterationsFloor => 3,
:iterationsCeiling => 8)
@@ -302,9 +310,11 @@
Iteration.new([tre, trp]),
],
[ftrp, ftrp],
- OpenStruct.new(:passPercentage => 0.4,
- :maxTries => 6,
- :maxFailing => 3)),
+ {
+ :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.4,
+ :maxTries => 6,
+ :maxFailing => 3)
+ }),
# Same, with different variability.
TestCase.new([nil, nil],
[
@@ -317,9 +327,11 @@
Iteration.new([tre, trf]),
],
[ftrp, ftrf],
- OpenStruct.new(:passPercentage => 0.4,
- :maxTries => 6,
- :maxFailing => 3)),
+ {
+ :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.4,
+ :maxTries => 6,
+ :maxFailing => 3)
+ }),
# Test that a flaky for which not enough results come in is not marked as completed.
TestCase.new([nil],
[
@@ -332,9 +344,12 @@
Iteration.new([tre]),
],
[ftre],
- OpenStruct.new(:passPercentage => 0.49,
- :maxTries => 4,
- :maxFailing => 1)),
+ {
+ :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.49,
+ :maxTries => 4,
+ :maxFailing => 1),
+ :acceptIncomplete => true,
+ }),
# Test that (statically) flaky tests aren't adjusted
TestCase.new([RetryParameters.new(0.5, 3), nil],
[
@@ -343,9 +358,11 @@
Iteration.new([trf, tre]),
],
[ftrf, ftrp],
- OpenStruct.new(:passPercentage => 0.7, # > 2/3
- :maxTries => 6,
- :maxFailing => 3)),
+ {
+ :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.7, # > 2/3
+ :maxTries => 6,
+ :maxFailing => 3)
+ }),
# Test that we respect maxFailing.
TestCase.new([nil, nil, nil],
[
@@ -352,9 +369,11 @@
Iteration.new([trf, trf, trf]),
],
[ftrf, ftrf, ftrf],
- OpenStruct.new(:passPercentage => 0.7, # > 2/3
- :maxTries => 6,
- :maxFailing => 2)),
+ {
+ :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.7, # > 2/3
+ :maxTries => 6,
+ :maxFailing => 2)
+ }),
# Test that we halt at maxIterations (3 + 5 = 8) if no results come in.
TestCase.new([RetryParameters.new(0.5, 5), nil],
[
@@ -378,7 +397,7 @@
}
putd("runlist: #{runlist.collect { |p| p.to_s }}, expected: #{testcase.expectedTestResults}")
executor = MockTestsExecutor.new(runlist, testcase.iterations,
- ITERATION_LIMITS, testcase.treatFailingAsFlaky,
+ ITERATION_LIMITS, testcase.opts[:treatFailingAsFlaky],
$testsDebugStream)
statusMap = executor.loop
if executor.executedIterations != testcase.iterations.size
@@ -392,7 +411,9 @@
evaluator = TestResultEvaluatorSimple.new(runlist, statusMap)
evaluator.visit!
-
+ if not testcase.opts[:acceptIncomplete]
+ evaluator.validate
+ end
verifyCompletedTestsMatchExpected(statusMap, evaluator.completed, expectCompleted)
testcaseFailed = false
Modified: trunk/Tools/Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb (292774 => 292775)
--- trunk/Tools/Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb 2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb 2022-04-12 17:04:48 UTC (rev 292775)
@@ -35,11 +35,13 @@
end
def failed(plan)
end
- def wasFlakyAndPassed(plan, successes, tries)
+ def wasFlaky(plan, successes, tries, successful)
end
def visiting(plan)
end
- def visit!
+ def validate
+ end
+ def visit!(opts={})
@runlist.each {
|plan|
visiting(plan)
@@ -59,8 +61,14 @@
else
testWasSuccessful = plan.retryParameters.result(statuses)
if testWasSuccessful.nil?
- # Not completed yet
- next
+ if opts[:treatIncompleteAsFailed]
+ # Maximum number of iterations reached without a
+ # conclusive result; draw the developer's attention.
+ testWasSuccessful = false
+ else
+ # Not completed yet
+ next
+ end
end
end
if testWasSuccessful
@@ -72,8 +80,15 @@
if nresults < 1
$stderr.puts("Unexpected number of results: #{nresults} for plan #{plan}")
raise "Unexpected number of results"
- elsif nresults != 1 and testWasSuccessful
- wasFlakyAndPassed(plan, statuses.count(STATUS_FILE_PASS), statuses.size)
+ elsif nresults != 1 and not plan.retryParameters.nil?
+ # The flakiness stats are about diagnosing code issues, so only
+ # record flakiness when we've intentionally retried the test.
+ # Otherwise, multiple results might have come in because of
+ # transient infrastructure failures causing the test to be
+ # rescheduled on another host too. But then any failures might
+ # be because the machine was under load (causing the test to hit
+ # a timeout) or OOM'd.
+ wasFlaky(plan, statuses.count(STATUS_FILE_PASS), statuses.size, testWasSuccessful)
end
}
end
@@ -107,6 +122,14 @@
def missing
@noResult
end
+ def validate
+ counted = @testsPassed.size + @testsFailed.size + @noResult.size
+ if counted != @all.size
+ $stderr.puts("Accounting mismatch: expected #{@all.size} test results " +
+ "but got #{counted} (#{@testsPassed.size}, #{@testsFailed.size}, #{@noResult.size})")
+ raise "Internal error in #{File.basename($0)}"
+ end
+ end
end
class ResultsWriter
@@ -128,8 +151,9 @@
def appendNoResult(plan)
append("noresult", plan.name)
end
- def appendFlaky(plan, successes, tries)
- append("flaky", "#{plan.name} #{successes} #{tries}")
+ def appendFlaky(plan, successes, tries, successful)
+ successful = successful ? 1 : 0
+ append("flaky", "#{plan.name} #{successes} #{tries} #{successful}")
end
def appendResult(plan, didPass)
append("results", "#{plan.name}: #{didPass ? 'PASS' : 'FAIL'}")
@@ -143,7 +167,7 @@
end
# Final accounting of results.
-class TestResultEvaluatorFinal < TestResultEvaluator
+class TestResultEvaluatorFinal < TestResultEvaluatorSimple
attr_reader :noresult, :familyMap
def initialize(runlist, statusMap)
super(runlist, statusMap)
@@ -152,21 +176,24 @@
@writer = ResultsWriter.new
end
def noResult(plan)
+ super(plan)
@writer.appendNoResult(plan)
@noresult += 1
end
def passed(plan)
+ super(plan)
@writer.appendPass(plan)
appendResult(plan, true)
addToFamilyMap(plan, "PASS")
end
def failed(plan)
+ super(plan)
@writer.appendFailure(plan)
appendResult(plan, false)
addToFamilyMap(plan, "FAIL")
end
- def wasFlakyAndPassed(plan, successes, tries)
- @writer.appendFlaky(plan, successes, tries)
+ def wasFlaky(plan, successes, tries, successful)
+ @writer.appendFlaky(plan, successes, tries, successful)
end
private
def appendResult(plan, successful)