Title: [292775] trunk/Tools
Revision
292775
Author
[email protected]
Date
2022-04-12 10:04:48 -0700 (Tue, 12 Apr 2022)

Log Message

[JSC] Report test flakiness to resultsdb
https://bugs.webkit.org/show_bug.cgi?id=238806

Reviewed by Jonathan Bedard.

Include flakiness info in the report sent to results.webkit.org.
This is a prerequisite to using --treat-failing-as-flaky in
post-commit JSC bots, as otherwise we'd lose any flakiness info
for tests that made it over the specified threshold.

For completeness, this patch includes the flakiness information
for both passed and failing tests.

It also bumps the hard iteration limit to 50 (a cap to the value
specified in --treat-failing-as-flaky). While here, add some
self-checking code that verifies the numbers add up. This needs
some minor refactoring in the tests to not declare an error when
the results are incomplete on purpose.

* Scripts/run-_javascript_core-tests:
(runJSCStressTests):
Propagate the flakiness information reported by run-jsc-stress
tests. Report known flakiness for both failing and passing tests.
* Scripts/run-jsc-stress-tests:
Bump iterationsCeiling. Ask the evaluator to
treatIncompleteAsFailed and perform validition.
* Scripts/webkitruby/jsc-stress-test/executor.rb:
Introduce and use acceptIncomplete for the test that needs it.
* Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb:
Introduce and use treatIncompleteAsFailed. Report both passed and
failed tests as flaky when they have been (subsequent to user
request) executed multiple times.

Modified Paths

Diff

Modified: trunk/Tools/ChangeLog (292774 => 292775)


--- trunk/Tools/ChangeLog	2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/ChangeLog	2022-04-12 17:04:48 UTC (rev 292775)
@@ -1,3 +1,38 @@
+2022-04-12  Angelos Oikonomopoulos  <[email protected]>
+
+        [JSC] Report test flakiness to resultsdb
+        https://bugs.webkit.org/show_bug.cgi?id=238806
+
+        Reviewed by Jonathan Bedard.
+
+        Include flakiness info in the report sent to results.webkit.org.
+        This is a prerequisite to using --treat-failing-as-flaky in
+        post-commit JSC bots, as otherwise we'd lose any flakiness info
+        for tests that made it over the specified threshold.
+
+        For completeness, this patch includes the flakiness information
+        for both passed and failing tests.
+
+        It also bumps the hard iteration limit to 50 (a cap to the value
+        specified in --treat-failing-as-flaky). While here, add some
+        self-checking code that verifies the numbers add up. This needs
+        some minor refactoring in the tests to not declare an error when
+        the results are incomplete on purpose.
+
+        * Scripts/run-_javascript_core-tests:
+        (runJSCStressTests):
+        Propagate the flakiness information reported by run-jsc-stress
+        tests. Report known flakiness for both failing and passing tests.
+        * Scripts/run-jsc-stress-tests:
+        Bump iterationsCeiling. Ask the evaluator to
+        treatIncompleteAsFailed and perform validition.
+        * Scripts/webkitruby/jsc-stress-test/executor.rb:
+        Introduce and use acceptIncomplete for the test that needs it.
+        * Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb:
+        Introduce and use treatIncompleteAsFailed. Report both passed and
+        failed tests as flaky when they have been (subsequent to user
+        request) executed multiple times.
+
 2022-04-12  Youenn Fablet  <[email protected]>
 
         Add persistent notification click API test

Modified: trunk/Tools/Scripts/run-_javascript_core-tests (292774 => 292775)


--- trunk/Tools/Scripts/run-_javascript_core-tests	2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/run-_javascript_core-tests	2022-04-12 17:04:48 UTC (rev 292775)
@@ -40,6 +40,7 @@
 use List::Util qw(min max);
 use POSIX;
 use webkitdirs;
+use Scalar::Util qw(looks_like_number);
 use Text::ParseWords;
 
 # determine configuration
@@ -946,21 +947,6 @@
         $reportData{$testSucceeded} = {actual => "PASS"};
     }
 
-    my @jscStressFlaky = readAllLines($jscStressResultsDir . "/flaky");
-    @jscStressFlaky = sort @jscStressFlaky;
-    my $numJSCStressFlaky = @jscStressFlaky;
-
-    my %jscStressFlakyInfo = ();
-    if ($numJSCStressFlaky) {
-        print "\n** The following JSC stress tests were flaky:\n";
-        foreach my $testFlaky (@jscStressFlaky) {
-            my @fields = split / /, $testFlaky;
-            print "\t$fields[0] $fields[1]/$fields[2] passes\n";
-            $jscStressFlakyInfo{$fields[0]} = { 'P' => $fields[1], 'T' => $fields[2]}
-        }
-        print "\n";
-    }
-
     my @jscStressFailList = readAllLines($jscStressResultsDir . "/failed");
     @jscStressFailList = sort @jscStressFailList;
     my $numJSCStressFailures = @jscStressFailList;
@@ -975,6 +961,32 @@
     }
     print "\n";
 
+    my @jscStressFlaky = readAllLines($jscStressResultsDir . "/flaky");
+    @jscStressFlaky = sort @jscStressFlaky;
+    my $numJSCStressFlaky = @jscStressFlaky;
+
+    my %jscStressFlakyInfo = ();
+
+    my $numJSCStressFlakyButPassed = 0;
+    if ($numJSCStressFlaky) {
+        print "\n** The following JSC stress tests were flaky:\n";
+        foreach my $testFlaky (@jscStressFlaky) {
+            my @fields = split / /, $testFlaky;
+            my $testName = $fields[0];
+            my $numPasses = $fields[1];
+            my $numTotal = $fields[2];
+            looks_like_number($fields[3]) || die "Malformed line: $testFlaky";
+            my $successful = int($fields[3]);
+            my $passFail = $successful ? "passed" : "failed";
+            print "\t$testName $fields[1]/$fields[2] passes, $passFail\n";
+            $jscStressFlakyInfo{$testName} = { 'P' => $numPasses, 'T' => $numTotal, 'S' => $successful };
+            # NB: the dict has been created above, we're only adding the flakiness field.
+            $reportData{$testName}{"flakiness_num_passes"} = int($numPasses);
+            $reportData{$testName}{"flakiness_num_tries"} =  int($numTotal);
+        }
+        print "\n";
+    }
+
     my @jscStressNoResultList = readAllLines($jscStressResultsDir . "/noresult");
     my $numJSCStressNoResultTests = @jscStressNoResultList;
 
@@ -987,7 +999,8 @@
 
     print "Results for JSC stress tests:\n";
     printThingsFound($numJSCStressFailures, "failure", "failures", "found");
-    printThingsFound($numJSCStressFlaky, "test", "tests", "flaky but passed");
+    printThingsFound($numJSCStressFlakyButPassed, "test", "tests", "flaky but passed");
+    printThingsFound($numJSCStressFlaky - $numJSCStressFlakyButPassed, "test", "tests", "flaky and failed");
     printThingsFound($numJSCStressNoResultTests, "test", "tests", "failed to complete");
     print "    OK.\n" if $numJSCStressFailures == 0 and $numJSCStressNoResultTests == 0;
 
@@ -994,7 +1007,7 @@
     print "\n";
 
     if (defined($jsonFileName)) {
-        $jsonData{'flakyAndPassed'} =  \%jscStressFlakyInfo;
+        $jsonData{'flaky'} =  \%jscStressFlakyInfo;
         $jsonData{'stressTestFailures'} = \@jscStressFailList;
     }
 

Modified: trunk/Tools/Scripts/run-jsc-stress-tests (292774 => 292775)


--- trunk/Tools/Scripts/run-jsc-stress-tests	2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/run-jsc-stress-tests	2022-04-12 17:04:48 UTC (rev 292775)
@@ -84,7 +84,7 @@
 PARALLEL_REMOTE_WRAPPER_MARK_END = "a9aea5c3b843"
 PARALLEL_REMOTE_STATE_LOST_MARKER = "709fb7a77c45231918eb118a"
 ITERATION_LIMITS = OpenStruct.new(:infraIterationsFloor => 3,
-                                  :iterationsCeiling => 10)
+                                  :iterationsCeiling => 50)
 
 REMOTE_TIMEOUT = 120
 SSH_OPTIONS_DEFAULT = [
@@ -2813,7 +2813,8 @@
     end
 
     evaluator = TestResultEvaluatorFinal.new($runlist, statusMap)
-    evaluator.visit!
+    evaluator.visit!({:treatIncompleteAsFailed => true})
+    evaluator.validate
 
     if evaluator.noresult > 0
         $stderr.puts("Could not get the exit status for #{evaluator.noresult} tests")

Modified: trunk/Tools/Scripts/webkitruby/jsc-stress-test/executor.rb (292774 => 292775)


--- trunk/Tools/Scripts/webkitruby/jsc-stress-test/executor.rb	2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/webkitruby/jsc-stress-test/executor.rb	2022-04-12 17:04:48 UTC (rev 292775)
@@ -169,10 +169,18 @@
 end
 
 module ExecutorSelfTests
+    class TestCase
+        attr_reader :testsWithFlaky, :iterations, :expectedTestResults, :opts
+        def initialize(testsWithFlaky, iterations, expectedTestResults, opts={})
+            @testsWithFlaky = testsWithFlaky
+            @iterations = iterations
+            @expectedTestResults = expectedTestResults
+            @opts = opts
+        end
+    end
     FinalTestResult = Struct.new(:result)
     TestResult = Struct.new(:results)
     Iteration = Struct.new(:testResults)
-    TestCase = Struct.new(:testsWithFlaky, :iterations, :expectedTestResults, :treatFailingAsFlaky)
     MOCK_TEST_FAMILY = "f"
     ITERATION_LIMITS = OpenStruct.new(:infraIterationsFloor => 3,
                                       :iterationsCeiling => 8)
@@ -302,9 +310,11 @@
                              Iteration.new([tre, trp]),
                          ],
                          [ftrp, ftrp],
-                         OpenStruct.new(:passPercentage => 0.4,
-                                        :maxTries => 6,
-                                        :maxFailing => 3)),
+                         {
+                             :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.4,
+                                                                  :maxTries => 6,
+                                                                  :maxFailing => 3)
+                         }),
             # Same, with different variability.
             TestCase.new([nil, nil],
                          [
@@ -317,9 +327,11 @@
                              Iteration.new([tre, trf]),
                          ],
                          [ftrp, ftrf],
-                         OpenStruct.new(:passPercentage => 0.4,
-                                        :maxTries => 6,
-                                        :maxFailing => 3)),
+                         {
+                             :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.4,
+                                                                    :maxTries => 6,
+                                                                    :maxFailing => 3)
+                         }),
             # Test that a flaky for which not enough results come in is not marked as completed.
             TestCase.new([nil],
                          [
@@ -332,9 +344,12 @@
                              Iteration.new([tre]),
                          ],
                          [ftre],
-                         OpenStruct.new(:passPercentage => 0.49,
-                                        :maxTries => 4,
-                                        :maxFailing => 1)),
+                         {
+                             :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.49,
+                                                                    :maxTries => 4,
+                                                                    :maxFailing => 1),
+                             :acceptIncomplete => true,
+                         }),
             # Test that (statically) flaky tests aren't adjusted
             TestCase.new([RetryParameters.new(0.5, 3), nil],
                          [
@@ -343,9 +358,11 @@
                              Iteration.new([trf, tre]),
                          ],
                          [ftrf, ftrp],
-                         OpenStruct.new(:passPercentage => 0.7, # > 2/3
-                                        :maxTries => 6,
-                                        :maxFailing => 3)),
+                         {
+                             :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.7, # > 2/3
+                                                                    :maxTries => 6,
+                                                                    :maxFailing => 3)
+                         }),
             # Test that we respect maxFailing.
             TestCase.new([nil, nil, nil],
                          [
@@ -352,9 +369,11 @@
                              Iteration.new([trf, trf, trf]),
                          ],
                          [ftrf, ftrf, ftrf],
-                         OpenStruct.new(:passPercentage => 0.7, # > 2/3
-                                        :maxTries => 6,
-                                        :maxFailing => 2)),
+                         {
+                             :treatFailingAsFlaky => OpenStruct.new(:passPercentage => 0.7, # > 2/3
+                                                                    :maxTries => 6,
+                                                                    :maxFailing => 2)
+                         }),
             # Test that we halt at maxIterations (3 + 5 = 8) if no results come in.
             TestCase.new([RetryParameters.new(0.5, 5), nil],
                          [
@@ -378,7 +397,7 @@
             }
             putd("runlist: #{runlist.collect { |p| p.to_s }}, expected: #{testcase.expectedTestResults}")
             executor = MockTestsExecutor.new(runlist, testcase.iterations,
-                                             ITERATION_LIMITS, testcase.treatFailingAsFlaky,
+                                             ITERATION_LIMITS, testcase.opts[:treatFailingAsFlaky],
                                              $testsDebugStream)
             statusMap = executor.loop
             if executor.executedIterations != testcase.iterations.size
@@ -392,7 +411,9 @@
 
             evaluator = TestResultEvaluatorSimple.new(runlist, statusMap)
             evaluator.visit!
-
+            if not testcase.opts[:acceptIncomplete]
+                evaluator.validate
+            end
             verifyCompletedTestsMatchExpected(statusMap, evaluator.completed, expectCompleted)
 
             testcaseFailed = false

Modified: trunk/Tools/Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb (292774 => 292775)


--- trunk/Tools/Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb	2022-04-12 17:02:46 UTC (rev 292774)
+++ trunk/Tools/Scripts/webkitruby/jsc-stress-test/test-result-evaluator.rb	2022-04-12 17:04:48 UTC (rev 292775)
@@ -35,11 +35,13 @@
     end
     def failed(plan)
     end
-    def wasFlakyAndPassed(plan, successes, tries)
+    def wasFlaky(plan, successes, tries, successful)
     end
     def visiting(plan)
     end
-    def visit!
+    def validate
+    end
+    def visit!(opts={})
         @runlist.each {
             |plan|
             visiting(plan)
@@ -59,8 +61,14 @@
             else
                 testWasSuccessful = plan.retryParameters.result(statuses)
                 if testWasSuccessful.nil?
-                    # Not completed yet
-                    next
+                    if opts[:treatIncompleteAsFailed]
+                        # Maximum number of iterations reached without a
+                        # conclusive result; draw the developer's attention.
+                        testWasSuccessful = false
+                    else
+                        # Not completed yet
+                        next
+                    end
                 end
             end
             if testWasSuccessful
@@ -72,8 +80,15 @@
             if nresults < 1
                 $stderr.puts("Unexpected number of results: #{nresults} for plan #{plan}")
                 raise "Unexpected number of results"
-            elsif nresults != 1 and testWasSuccessful
-                wasFlakyAndPassed(plan, statuses.count(STATUS_FILE_PASS), statuses.size)
+            elsif nresults != 1 and not plan.retryParameters.nil?
+                # The flakiness stats are about diagnosing code issues, so only
+                # record flakiness when we've intentionally retried the test.
+                # Otherwise, multiple results might have come in because of
+                # transient infrastructure failures causing the test to be
+                # rescheduled on another host too. But then any failures might
+                # be because the machine was under load (causing the test to hit
+                # a timeout) or OOM'd.
+                wasFlaky(plan, statuses.count(STATUS_FILE_PASS), statuses.size, testWasSuccessful)
             end
         }
     end
@@ -107,6 +122,14 @@
     def missing
         @noResult
     end
+    def validate
+        counted = @testsPassed.size + @testsFailed.size + @noResult.size
+        if counted != @all.size
+            $stderr.puts("Accounting mismatch: expected #{@all.size} test results " +
+                         "but got #{counted} (#{@testsPassed.size}, #{@testsFailed.size}, #{@noResult.size})")
+            raise "Internal error in #{File.basename($0)}"
+        end
+    end
 end
 
 class ResultsWriter
@@ -128,8 +151,9 @@
     def appendNoResult(plan)
         append("noresult", plan.name)
     end
-    def appendFlaky(plan, successes, tries)
-        append("flaky", "#{plan.name} #{successes} #{tries}")
+    def appendFlaky(plan, successes, tries, successful)
+        successful = successful ? 1 : 0
+        append("flaky", "#{plan.name} #{successes} #{tries} #{successful}")
     end
     def appendResult(plan, didPass)
         append("results", "#{plan.name}: #{didPass ? 'PASS' : 'FAIL'}")
@@ -143,7 +167,7 @@
 end
 
 # Final accounting of results.
-class TestResultEvaluatorFinal < TestResultEvaluator
+class TestResultEvaluatorFinal < TestResultEvaluatorSimple
     attr_reader :noresult, :familyMap
     def initialize(runlist, statusMap)
         super(runlist, statusMap)
@@ -152,21 +176,24 @@
         @writer = ResultsWriter.new
     end
     def noResult(plan)
+        super(plan)
         @writer.appendNoResult(plan)
         @noresult += 1
     end
     def passed(plan)
+        super(plan)
         @writer.appendPass(plan)
         appendResult(plan, true)
         addToFamilyMap(plan, "PASS")
     end
     def failed(plan)
+        super(plan)
         @writer.appendFailure(plan)
         appendResult(plan, false)
         addToFamilyMap(plan, "FAIL")
     end
-    def wasFlakyAndPassed(plan, successes, tries)
-        @writer.appendFlaky(plan, successes, tries)
+    def wasFlaky(plan, successes, tries, successful)
+        @writer.appendFlaky(plan, successes, tries, successful)
     end
     private
     def appendResult(plan, successful)
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to