Log Message
CommitQueue and EWS should reject any patches that result in consistent test failures that aren't present on the tree. https://bugs.webkit.org/show_bug.cgi?id=138184
Patch by Jake Nielsen <[email protected]> on 2014-10-29 Reviewed by Alexey Proskuryakov. * Scripts/webkitpy/layout_tests/models/test_results.py: Adds a simple hashing function to allow for set operations to handle TestResult objects properly. (TestResult.__hash__): * Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py: Adds one unit test, and modifies others to agree with the notion that patches that introduce new test failures (but also have flakyness) should be rejected rather than spin. (MockCommitQueue.report_flaky_tests): (CommitQueueTaskTest._run_and_expect_patch_analysis_result): (test_double_flaky_test_failure): (test_two_flaky_tests): (test_very_flaky_patch): (test_very_flaky_patch_with_some_tree_redness): (test_different_test_failures): (test_different_test_failures_with_some_tree_redness): (test_different_test_failures_with_some_tree_redness_and_some_fixes): (test_mildly_flaky_patch): (test_mildly_flaky_patch_with_some_tree_redness): * Scripts/webkitpy/tool/bot/patchanalysistask.py: Makes PatchAnalysisTask reject said patches. (PatchAnalysisTask._test_patch):
Modified Paths
Diff
Modified: trunk/Tools/ChangeLog (175366 => 175367)
--- trunk/Tools/ChangeLog 2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/ChangeLog 2014-10-30 05:02:33 UTC (rev 175367)
@@ -1,3 +1,34 @@
+2014-10-29 Jake Nielsen <[email protected]>
+
+ CommitQueue and EWS should reject any patches that result in consistent test
+ failures that aren't present on the tree.
+ https://bugs.webkit.org/show_bug.cgi?id=138184
+
+ Reviewed by Alexey Proskuryakov.
+
+ * Scripts/webkitpy/layout_tests/models/test_results.py:
+ Adds a simple hashing function to allow for set operations to handle
+ TestResult objects properly.
+ (TestResult.__hash__):
+ * Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py:
+ Adds one unit test, and modifies others to agree with the notion that
+ patches that introduce new test failures (but also have flakyness)
+ should be rejected rather than spin.
+ (MockCommitQueue.report_flaky_tests):
+ (CommitQueueTaskTest._run_and_expect_patch_analysis_result):
+ (test_double_flaky_test_failure):
+ (test_two_flaky_tests):
+ (test_very_flaky_patch):
+ (test_very_flaky_patch_with_some_tree_redness):
+ (test_different_test_failures):
+ (test_different_test_failures_with_some_tree_redness):
+ (test_different_test_failures_with_some_tree_redness_and_some_fixes):
+ (test_mildly_flaky_patch):
+ (test_mildly_flaky_patch_with_some_tree_redness):
+ * Scripts/webkitpy/tool/bot/patchanalysistask.py:
+ Makes PatchAnalysisTask reject said patches.
+ (PatchAnalysisTask._test_patch):
+
2014-10-29 Youenn Fablet <[email protected]>
WinCairoRequirements.zip cannot be downloaded from dropbox
Modified: trunk/Tools/Scripts/webkitpy/layout_tests/models/test_results.py (175366 => 175367)
--- trunk/Tools/Scripts/webkitpy/layout_tests/models/test_results.py 2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/models/test_results.py 2014-10-30 05:02:33 UTC (rev 175367)
@@ -64,6 +64,9 @@
def __ne__(self, other):
return not (self == other)
+ def __hash__(self):
+ return self.test_name.__hash__()
+
def has_failure_matching_types(self, *failure_classes):
for failure in self.failures:
if type(failure) in failure_classes:
Modified: trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py (175366 => 175367)
--- trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py 2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/Scripts/webkitpy/tool/bot/commitqueuetask_unittest.py 2014-10-30 05:02:33 UTC (rev 175367)
@@ -73,8 +73,9 @@
return LayoutTestResults(test_results=[], did_exceed_test_failure_limit=False)
def report_flaky_tests(self, patch, flaky_results, results_archive):
- self._flaky_tests = [result.test_name for result in flaky_results]
- _log.info("report_flaky_tests: patch='%s' flaky_tests='%s' archive='%s'" % (patch.id(), self._flaky_tests, results_archive.filename))
+ current_flaky_tests = [result.test_name for result in flaky_results]
+ self._flaky_tests += current_flaky_tests
+ _log.info("report_flaky_tests: patch='%s' flaky_tests='%s' archive='%s'" % (patch.id(), current_flaky_tests, results_archive.filename))
def get_reported_flaky_tests(self):
return self._flaky_tests
@@ -174,7 +175,7 @@
analysis_result = PatchAnalysisResult.FAIL
self.assertEqual(analysis_result, expected_analysis_result)
- self.assertEqual(commit_queue.get_reported_flaky_tests(), expected_reported_flaky_tests)
+ self.assertEqual(frozenset(commit_queue.get_reported_flaky_tests()), frozenset(expected_reported_flaky_tests))
self.assertEqual(commit_queue.did_run_clean_tests(), expect_clean_tests_to_run)
# The failure status only means anything if we actually failed.
@@ -378,7 +379,7 @@
# The (subtle) point of this test is that report_flaky_tests does not get
# called for this run.
# Note also that there is no attempt to run the tests w/o the patch.
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expected_reported_flaky_tests=["Fail1", "Fail2"])
def test_test_failure(self):
commit_queue = MockSimpleTestPlanCommitQueue(
@@ -421,7 +422,7 @@
clean_test_failures=["Fail1", "Fail2"])
# FIXME: This should pass, but as of right now, it defers.
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expected_reported_flaky_tests=["Fail1", "Fail2"])
def test_one_flaky_test(self):
commit_queue = MockSimpleTestPlanCommitQueue(
@@ -438,16 +439,16 @@
clean_test_failures=[])
# FIXME: This should actually fail, but right now it defers
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expected_reported_flaky_tests=["Fail1", "Fail2", "Fail3", "Fail4", "Fail5", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"])
def test_very_flaky_patch_with_some_tree_redness(self):
commit_queue = MockSimpleTestPlanCommitQueue(
- first_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"],
- second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
+ first_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
+ second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"],
clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
# FIXME: This should actually fail, but right now it defers
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail1", "Fail2", "Fail3", "Fail4", "Fail5", "Fail6", "Fail7", "Fail8", "Fail9", "Fail10"])
def test_different_test_failures(self):
commit_queue = MockSimpleTestPlanCommitQueue(
@@ -455,8 +456,7 @@
second_test_failures=["Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
clean_test_failures=[])
- # FIXME: This should actually fail, but right now it defers
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail6"], expected_failure_status_id=1)
def test_different_test_failures_with_some_tree_redness(self):
commit_queue = MockSimpleTestPlanCommitQueue(
@@ -464,16 +464,23 @@
second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
- # FIXME: This should actually fail, but right now it defers
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail6"], expected_failure_status_id=1)
+ def test_different_test_failures_with_some_tree_redness_and_some_fixes(self):
+ commit_queue = MockSimpleTestPlanCommitQueue(
+ first_test_failures=["PreExistingFail1", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5", "Fail6"],
+ second_test_failures=["PreExistingFail1", "Fail1", "Fail2", "Fail3", "Fail4", "Fail5"],
+ clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
+
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.FAIL, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail6"], expected_failure_status_id=1)
+
def test_mildly_flaky_patch(self):
commit_queue = MockSimpleTestPlanCommitQueue(
first_test_failures=["Fail1"],
second_test_failures=["Fail2"],
clean_test_failures=[])
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=False, expected_reported_flaky_tests=["Fail1", "Fail2"])
def test_mildly_flaky_patch_with_some_tree_redness(self):
commit_queue = MockSimpleTestPlanCommitQueue(
@@ -481,7 +488,7 @@
second_test_failures=["PreExistingFail1", "PreExistingFail2", "Fail2"],
clean_test_failures=["PreExistingFail1", "PreExistingFail2"])
- self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER)
+ self._run_and_expect_patch_analysis_result(commit_queue, PatchAnalysisResult.DEFER, expect_clean_tests_to_run=True, expected_reported_flaky_tests=["Fail1", "Fail2"])
def test_tree_more_red_than_patch(self):
commit_queue = MockSimpleTestPlanCommitQueue(
Modified: trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py (175366 => 175367)
--- trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py 2014-10-30 04:53:44 UTC (rev 175366)
+++ trunk/Tools/Scripts/webkitpy/tool/bot/patchanalysistask.py 2014-10-30 05:02:33 UTC (rev 175367)
@@ -225,11 +225,28 @@
return self._continue_testing_patch_that_exceeded_failure_limit_on_first_or_second_try(second_results, second_results_archive, second_script_error)
if self._results_failed_different_tests(first_results, second_results):
- # We could report flaky tests here, but we would need to be careful
- # to use similar checks to ExpectedFailures._can_trust_results
- # to make sure we don't report constant failures as flakes when
- # we happen to hit the --exit-after-N-failures limit.
- # See https://bugs.webkit.org/show_bug.cgi?id=51272
+ first_failing_results_set = frozenset(first_results.failing_test_results())
+ second_failing_results_set = frozenset(second_results.failing_test_results())
+
+ tests_that_only_failed_first = first_failing_results_set.difference(second_failing_results_set)
+ self._report_flaky_tests(tests_that_only_failed_first, first_results_archive)
+
+ tests_that_only_failed_second = second_failing_results_set.difference(first_failing_results_set)
+ self._report_flaky_tests(tests_that_only_failed_second, second_results_archive)
+
+ tests_that_consistently_failed = first_failing_results_set.intersection(second_failing_results_set)
+ if tests_that_consistently_failed:
+ self._build_and_test_without_patch()
+ self._clean_tree_results = self._delegate.test_results()
+ tests_that_failed_on_tree = self._clean_tree_results.failing_test_results()
+
+ new_failures_introduced_by_patch = tests_that_consistently_failed.difference(set(tests_that_failed_on_tree))
+ if new_failures_introduced_by_patch:
+ self.failure_status_id = first_failure_status_id
+ return self.report_failure(first_results_archive, new_failures_introduced_by_patch, first_script_error)
+
+ # At this point we know that there is flakyness with the patch applied, but no consistent failures
+ # were introduced. This is a bit of a grey-zone.
return False
if self._build_and_test_without_patch():
_______________________________________________ webkit-changes mailing list [email protected] https://lists.webkit.org/mailman/listinfo/webkit-changes
