Title: [175367] trunk/Tools
Revision
175367
Author
[email protected]
Date
2014-10-29 22:02:33 -0700 (Wed, 29 Oct 2014)

Log Message

CommitQueue and EWS should reject any patches that result in consistent test
failures that aren't present on the tree.
https://bugs.webkit.org/show_bug.cgi?id=138184

Patch by Jake Nielsen <[email protected]> on 2014-10-29
Reviewed by Alexey Proskuryakov.

* Scripts/webkitpy/layout_tests/models/test_results.py:
Adds a simple hashing function to allow for set operations to handle
TestResult objects properly.
(TestResult.__hash__):
* Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:
Adds one unit test, and modifies others to agree with the notion that
patches that introduce new test failures (but also have flakyness)
should be rejected rather than spin.
(MockCommitQueue.report_flaky_tests):
(CommitQueueTaskTest._run_and_expect_patch_analysis_result):
(test_double_flaky_test_failure):
(test_two_flaky_tests):
(test_very_flaky_patch):
(test_very_flaky_patch_with_some_tree_redness):
(test_different_test_failures):
(test_different_test_failures_with_some_tree_redness):
(test_different_test_failures_with_some_tree_redness_and_some_fixes):
(test_mildly_flaky_patch):
(test_mildly_flaky_patch_with_some_tree_redness):
* Scripts/webkitpy/tool/bot/patchanalysistask.py:
Makes PatchAnalysisTask reject said patches.
(PatchAnalysisTask._test_patch):

Modified Paths

Diff

Modified: trunk/Tools/ChangeLog (175366 => 175367)


--- trunk/Tools/ChangeLog	2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/ChangeLog	2014-10-30 05:02:33 UTC (rev 175367)
@@ -1,3 +1,34 @@
+2014-10-29  Jake Nielsen  <[email protected]>
+
+        CommitQueue and EWS should reject any patches that result in consistent test
+        failures that aren't present on the tree.
+        https://bugs.webkit.org/show_bug.cgi?id=138184
+
+        Reviewed by Alexey Proskuryakov.
+
+        * Scripts/webkitpy/layout_tests/models/test_results.py:
+        Adds a simple hashing function to allow for set operations to handle
+        TestResult objects properly.
+        (TestResult.__hash__):
+        * Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:
+        Adds one unit test, and modifies others to agree with the notion that
+        patches that introduce new test failures (but also have flakyness)
+        should be rejected rather than spin.
+        (MockCommitQueue.report_flaky_tests):
+        (CommitQueueTaskTest._run_and_expect_patch_analysis_result):
+        (test_double_flaky_test_failure):
+        (test_two_flaky_tests):
+        (test_very_flaky_patch):
+        (test_very_flaky_patch_with_some_tree_redness):
+        (test_different_test_failures):
+        (test_different_test_failures_with_some_tree_redness):
+        (test_different_test_failures_with_some_tree_redness_and_some_fixes):
+        (test_mildly_flaky_patch):
+        (test_mildly_flaky_patch_with_some_tree_redness):
+        * Scripts/webkitpy/tool/bot/patchanalysistask.py:
+        Makes PatchAnalysisTask reject said patches.
+        (PatchAnalysisTask._test_patch):
+
 2014-10-29  Youenn Fablet  <[email protected]>
 
         WinCairoRequirements.zip cannot be downloaded from dropbox

Modified: trunk/Tools/Scripts/webkitpy/layout_tests/models/test_results.py (175366 => 175367)


--- trunk/Tools/Scripts/webkitpy/layout_tests/models/test_results.py	2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/models/test_results.py	2014-10-30 05:02:33 UTC (rev 175367)
@@ -64,6 +64,9 @@
     def __ne__(self, other):
         return not (self == other)
 
+    def __hash__(self):
+        return self.test_name.__hash__()
+
     def has_failure_matching_types(self, *failure_classes):
         for failure in self.failures:
             if type(failure) in failure_classes:

Modified: trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py (175366 => 175367)


--- trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py	2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py	2014-10-30 05:02:33 UTC (rev 175367)
@@ -73,8 +73,9 @@
         return LayoutTestResults(test_results=[], did_exceed_test_failure_limit=False)
 
     def report_flaky_tests(self, patch, flaky_results, results_archive):
-        self._flaky_tests = [result.test_name for result in flaky_results]
-        _log.info("report_flaky_tests: patch='%s' flaky_tests='%s' archive='%s'" % (patch.id(), self._flaky_tests, results_archive.filename))
+        current_flaky_tests = [result.test_name for result in flaky_results]
+        self._flaky_tests += current_flaky_tests
+        _log.info("report_flaky_tests: patch='%s' flaky_tests='%s' archive='%s'" % (patch.id(), current_flaky_tests, results_archive.filename))
 
     def get_reported_flaky_tests(self):
         return self._flaky_tests
@@ -174,7 +175,7 @@
             analysis_result = PatchAnalysisResult.FAIL
 
         self.assertEqual(analysis_result, expected_analysis_result)
-        self.assertEqual(commit_queue.get_reported_flaky_tests(), expected_reported_flaky_tests)
+        self.assertEqual(frozenset(commit_queue.get_reported_flaky_tests()), frozenset(expected_reported_flaky_tests))
         self.assertEqual(commit_queue.did_run_clean_tests(), expect_clean_tests_to_run)
 
         # The failure status only means anything if we actually failed.
@@ -378,7 +379,7 @@
         # The (subtle) point of this test is that report_flaky_tests does not get
         # called for this run.
         # Note also that there is no attempt to run the tests w/o the patch.
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expected_reported_flaky_tests=["Fail1", "Fail2"])
 
     def test_test_failure(self):
         commit_queue = MockSimpleTestPlanCommitQueue(
@@ -421,7 +422,7 @@
             clean_test_failures=["Fail1", "Fail2"])
 
         # FIXME: This should pass, but as of right now, it defers.
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expected_reported_flaky_tests=["Fail1", "Fail2"])
 
     def test_one_flaky_test(self):
         commit_queue = MockSimpleTestPlanCommitQueue(
@@ -438,16 +439,16 @@
             clean_test_failures=[])
 
         # FIXME: This should actually fail, but right now it defers
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expected_reported_flaky_tests=["Fail1", "Fail2", "Fail3", "Fail4", "Fail5", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"])
 
     def test_very_flaky_patch_with_some_tree_redness(self):
         commit_queue = MockSimpleTestPlanCommitQueue(
-            first_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"],
-            second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
+            first_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
+            second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"],
             clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
 
         # FIXME: This should actually fail, but right now it defers
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail1", "Fail2", "Fail3", "Fail4", "Fail5", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"])
 
     def test_different_test_failures(self):
         commit_queue = MockSimpleTestPlanCommitQueue(
@@ -455,8 +456,7 @@
             second_test_failures=["Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
             clean_test_failures=[])
 
-        # FIXME: This should actually fail, but right now it defers
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail6"], expected_failure_status_id=1)
 
     def test_different_test_failures_with_some_tree_redness(self):
         commit_queue = MockSimpleTestPlanCommitQueue(
@@ -464,16 +464,23 @@
             second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
             clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
 
-        # FIXME: This should actually fail, but right now it defers
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail6"], expected_failure_status_id=1)
 
+    def test_different_test_failures_with_some_tree_redness_and_some_fixes(self):
+        commit_queue = MockSimpleTestPlanCommitQueue(
+            first_test_failures=["PreExistingFail1", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5", "Fail6"],
+            second_test_failures=["PreExistingFail1", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
+            clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
+
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail6"], expected_failure_status_id=1)
+
     def test_mildly_flaky_patch(self):
         commit_queue = MockSimpleTestPlanCommitQueue(
             first_test_failures=["Fail1"],
             second_test_failures=["Fail2"],
             clean_test_failures=[])
 
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=False, expected_reported_flaky_tests=["Fail1", "Fail2"])
 
     def test_mildly_flaky_patch_with_some_tree_redness(self):
         commit_queue = MockSimpleTestPlanCommitQueue(
@@ -481,7 +488,7 @@
             second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail2"],
             clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
 
-        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+        self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail1", "Fail2"])
 
     def test_tree_more_red_than_patch(self):
         commit_queue = MockSimpleTestPlanCommitQueue(

Modified: trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py (175366 => 175367)


--- trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py	2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py	2014-10-30 05:02:33 UTC (rev 175367)
@@ -225,11 +225,28 @@
             return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(second_results, second_results_archive, second_script_error)
 
         if self._results_failed_different_tests(first_results, second_results):
-            # We could report flaky tests here, but we would need to be careful
-            # to use similar checks to ExpectedFailures._can_trust_results
-            # to make sure we don't report constant failures as flakes when
-            # we happen to hit the --exit-after-N-failures limit.
-            # See https://bugs.webkit.org/show_bug.cgi?id=51272
+            first_failing_results_set = frozenset(first_results.failing_test_results())
+            second_failing_results_set = frozenset(second_results.failing_test_results())
+
+            tests_that_only_failed_first = first_failing_results_set.difference(second_failing_results_set)
+            self._report_flaky_tests(tests_that_only_failed_first, first_results_archive)
+
+            tests_that_only_failed_second = second_failing_results_set.difference(first_failing_results_set)
+            self._report_flaky_tests(tests_that_only_failed_second, second_results_archive)
+
+            tests_that_consistently_failed = first_failing_results_set.intersection(second_failing_results_set)
+            if tests_that_consistently_failed:
+                self._build_and_test_without_patch()
+                self._clean_tree_results = self._delegate.test_results()
+                tests_that_failed_on_tree = self._clean_tree_results.failing_test_results()
+
+                new_failures_introduced_by_patch = tests_that_consistently_failed.difference(set(tests_that_failed_on_tree))
+                if new_failures_introduced_by_patch:
+                    self.failure_status_id = first_failure_status_id
+                    return self.report_failure(first_results_archive, new_failures_introduced_by_patch, first_script_error)
+
+            # At this point we know that there is flakyness with the patch applied, but no consistent failures
+            # were introduced. This is a bit of a grey-zone.
             return False
 
         if self._build_and_test_without_patch():
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to