Title: [284784] trunk/Tools
Revision
284784
Author
clo...@igalia.com
Date
2021-10-25 08:17:47 -0700 (Mon, 25 Oct 2021)

Log Message

[webkitpy] webkit-test-runner doesn't report all results when a test is run several times
https://bugs.webkit.org/show_bug.cgi?id=231790

Reviewed by Jonathan Bedard.

WTR was only picking one of the failure results when a test was run more than once
(for example with the flag --repeat-each=X), so it was not reporting all the values
that the test generated. This is a major issue when searching for flaky tests.

This patch adds a dictionary to store all the results of a given test on the repeated
repeats and then it ensures those values are taken into account when reporting the
final results for the test. It marks the test as flaky if more than one different value
was generated.

* Scripts/webkitpy/layout_tests/models/test_run_results.py:
(TestRunResults.__init__):
(TestRunResults.add):
(TestRunResults.merge):
(summarize_results):

Modified Paths

Diff

Modified: trunk/Tools/ChangeLog (284783 => 284784)


--- trunk/Tools/ChangeLog	2021-10-25 14:36:41 UTC (rev 284783)
+++ trunk/Tools/ChangeLog	2021-10-25 15:17:47 UTC (rev 284784)
@@ -1,3 +1,25 @@
+2021-10-25  Carlos Alberto Lopez Perez  <clo...@igalia.com>
+
+        [webkitpy] webkit-test-runner doesn't report all results when a test is run several times
+        https://bugs.webkit.org/show_bug.cgi?id=231790
+
+        Reviewed by Jonathan Bedard.
+
+        WTR was only picking one of the failure results when a test was run more than once
+        (for example with the flag --repeat-each=X), so it was not reporting all the values
+        that the test generated. This is a major issue when searching for flaky tests.
+
+        This patch adds a dictionary to store all the results of a given test on the repeated
+        repeats and then it ensures those values are taken into account when reporting the
+        final results for the test. It marks the test as flaky if more than one different value
+        was generated.
+
+        * Scripts/webkitpy/layout_tests/models/test_run_results.py:
+        (TestRunResults.__init__):
+        (TestRunResults.add):
+        (TestRunResults.merge):
+        (summarize_results):
+
 2021-10-25  Aakash Jain  <aakash_j...@apple.com>
 
         Add support for fast-cq mode to webkit-patch land-safely command

Modified: trunk/Tools/Scripts/webkitpy/layout_tests/models/test_run_results.py (284783 => 284784)


--- trunk/Tools/Scripts/webkitpy/layout_tests/models/test_run_results.py	2021-10-25 14:36:41 UTC (rev 284783)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/models/test_run_results.py	2021-10-25 15:17:47 UTC (rev 284784)
@@ -58,6 +58,7 @@
         self.expected_results_by_name = {}
         self.unexpected_results_by_name = {}
         self.failures_by_name = {}
+        self.repeated_results_by_name = {}  # Map of test name to a list of results, when a tests is run more than once (like when passing --repeat-each)
         self.total_failures = 0
         self.expected_skips = 0
         for expectation in test_expectations.TestExpectations.EXPECTATIONS.values():
@@ -70,6 +71,9 @@
 
     def add(self, test_result, expected):
         self.tests_by_expectation[test_result.type].add(test_result.test_name)
+        if test_result.test_name not in self.repeated_results_by_name:
+            self.repeated_results_by_name[test_result.test_name] = set()
+        self.repeated_results_by_name[test_result.test_name].add(test_result.type)
         self.results_by_name[test_result.test_name] = self.results_by_name.get(test_result.test_name, test_result)
         if test_result.is_other_crash:
             return
@@ -158,6 +162,7 @@
         self.unexpected_timeouts += test_run_results.unexpected_timeouts
         self.tests_by_expectation = merge_dict_sets(self.tests_by_expectation, test_run_results.tests_by_expectation)
         self.tests_by_timeline = merge_dict_sets(self.tests_by_timeline, test_run_results.tests_by_timeline)
+        self.repeated_results_by_name = merge_dict_sets(self.repeated_results_by_name, test_run_results.repeated_results_by_name)
         self.results_by_name.update(test_run_results.results_by_name)
         self.all_results += test_run_results.all_results
         self.expected_results_by_name.update(test_run_results.expected_results_by_name)
@@ -315,7 +320,9 @@
                     num_regressions += 1
                     test_dict['report'] = 'REGRESSION'
             elif retry_results and test_name in retry_results.expected_results_by_name:
-                actual.append(keywords[retry_results.expected_results_by_name[test_name].type])
+                retry_result_name = keywords[retry_results.expected_results_by_name[test_name].type]
+                if retry_result_name not in actual:
+                    actual.append(retry_result_name)
                 num_flaky += 1
                 test_dict['report'] = 'FLAKY'
             else:
@@ -322,6 +329,15 @@
                 num_regressions += 1
                 test_dict['report'] = 'REGRESSION'
 
+        # If a test was run more than once on the initial_results (for example with --repeat-each),
+        # check for possible flakiness there and also account in "actual" for the extra results.
+        for repeated_result in initial_results.repeated_results_by_name[test_name]:
+            repeated_result_name = keywords[repeated_result]
+            if repeated_result_name not in actual:
+                actual.append(repeated_result_name)
+                if test_name in initial_results.unexpected_results_by_name:
+                    test_dict['report'] = 'FLAKY'
+
         test_dict['expected'] = expected
         test_dict['actual'] = " ".join(actual)
         if include_time_and_modifiers:
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to