Modified: trunk/Tools/BuildSlaveSupport/ews-build/steps.py (247568 => 247569)
--- trunk/Tools/BuildSlaveSupport/ews-build/steps.py 2019-07-18 18:57:35 UTC (rev 247568)
+++ trunk/Tools/BuildSlaveSupport/ews-build/steps.py 2019-07-18 19:03:17 UTC (rev 247569)
@@ -991,7 +991,7 @@
def evaluateCommand(self, cmd):
rc = shell.Test.evaluateCommand(self, cmd)
- self.build.addStepsAfterCurrentStep([ArchiveTestResults(), UploadTestResults(identifier='clean-tree'), ExtractTestResults(identifier='clean-tree')])
+ self.build.addStepsAfterCurrentStep([ArchiveTestResults(), UploadTestResults(identifier='clean-tree'), ExtractTestResults(identifier='clean-tree'), AnalyzeLayoutTestsResults()])
return rc
def commandComplete(self, cmd):
@@ -1007,6 +1007,107 @@
self._parseRunWebKitTestsOutput(logText)
+class AnalyzeLayoutTestsResults(buildstep.BuildStep):
+ name = 'analyze-layout-tests-results'
+ description = ['analyze-layout-test-results']
+ descriptionDone = ['analyze-layout-tests-results']
+
+ def report_failure(self, new_failures):
+ self.finished(FAILURE)
+ self.build.results = FAILURE
+ pluralSuffix = 's' if len(new_failures) > 1 else ''
+ new_failures_string = ', '.join([failure_name for failure_name in new_failures])
+ message = 'Found {} new Test failure{}: {}'.format(len(new_failures), pluralSuffix, new_failures_string)
+ self.descriptionDone = message
+ self.build.buildFinished([message], FAILURE)
+ return defer.succeed(None)
+
+ def report_pre_existing_failures(self, clean_tree_failures):
+ self.finished(SUCCESS)
+ self.build.results = SUCCESS
+ self.descriptionDone = 'Passed layout tests'
+ pluralSuffix = 's' if len(clean_tree_failures) > 1 else ''
+ message = 'Found {} pre-existing test failure{}'.format(len(clean_tree_failures), pluralSuffix)
+ self.build.buildFinished([message], SUCCESS)
+ return defer.succeed(None)
+
+ def retry_build(self, message=''):
+ self.finished(RETRY)
+ message = 'Unable to confirm if test failures are introduced by patch, retrying build'
+ self.descriptionDone = message
+ self.build.buildFinished([message], RETRY)
+ return defer.succeed(None)
+
+ def _results_failed_different_tests(self, first_results_failing_tests, second_results_failing_tests):
+ return first_results_failing_tests != second_results_failing_tests
+
+ def _report_flaky_tests(self, flaky_tests):
+ #TODO: implement this
+ pass
+
+ def start(self):
+ first_results_did_exceed_test_failure_limit = self.getProperty('first_results_exceed_failure_limit')
+ first_results_failing_tests = set(self.getProperty('first_run_failures', []))
+ second_results_did_exceed_test_failure_limit = self.getProperty('second_results_exceed_failure_limit')
+ second_results_failing_tests = set(self.getProperty('second_run_failures', []))
+ clean_tree_results_did_exceed_test_failure_limit = self.getProperty('clean_tree_results_exceed_failure_limit')
+ clean_tree_results_failing_tests = set(self.getProperty('clean_tree_run_failures', []))
+
+ if first_results_did_exceed_test_failure_limit and second_results_did_exceed_test_failure_limit:
+ if (len(first_results_failing_tests) - len(clean_tree_results_failing_tests)) <= 5:
+ # If we've made it here, then many tests are failing with the patch applied, but
+ # if the clean tree is also failing many tests, even if it's not quite as many,
+ # then we can't be certain that the discrepancy isn't due to flakiness, and hence we must defer judgement.
+ return self.retry_build()
+ return self.report_failure(first_results_failing_tests)
+
+ if second_results_did_exceed_test_failure_limit:
+ if clean_tree_results_did_exceed_test_failure_limit:
+ return self.retry_build()
+ failures_introduced_by_patch = first_results_failing_tests - clean_tree_results_failing_tests
+ if failures_introduced_by_patch:
+ return self.report_failure(failures_introduced_by_patch)
+ return self.retry_build()
+
+ if first_results_did_exceed_test_failure_limit:
+ if clean_tree_results_did_exceed_test_failure_limit:
+ return self.retry_build()
+ failures_introduced_by_patch = second_results_failing_tests - clean_tree_results_failing_tests
+ if failures_introduced_by_patch:
+ return self.report_failure(failures_introduced_by_patch)
+ return self.retry_build()
+
+ if self._results_failed_different_tests(first_results_failing_tests, second_results_failing_tests):
+ tests_that_only_failed_first = first_results_failing_tests.difference(second_results_failing_tests)
+ self._report_flaky_tests(tests_that_only_failed_first)
+
+ tests_that_only_failed_second = second_results_failing_tests.difference(first_results_failing_tests)
+ self._report_flaky_tests(tests_that_only_failed_second)
+
+ tests_that_consistently_failed = first_results_failing_tests.intersection(second_results_failing_tests)
+ if tests_that_consistently_failed:
+ if clean_tree_results_did_exceed_test_failure_limit:
+ return self.retry_build()
+ failures_introduced_by_patch = tests_that_consistently_failed - clean_tree_results_failing_tests
+ if failures_introduced_by_patch:
+ return self.report_failure(failures_introduced_by_patch)
+
+ # At this point we know that at least one test flaked, but no consistent failures
+ # were introduced. This is a bit of a grey-zone.
+ return self.retry_build()
+
+ if clean_tree_results_did_exceed_test_failure_limit:
+ return self.retry_build()
+ failures_introduced_by_patch = first_results_failing_tests - clean_tree_results_failing_tests
+ if failures_introduced_by_patch:
+ return self.report_failure(failures_introduced_by_patch)
+
+ # At this point, we know that the first and second runs had the exact same failures,
+ # and that those failures are all present on the clean tree, so we can say with certainty
+ # that the patch is good.
+ return self.report_pre_existing_failures(clean_tree_results_failing_tests)
+
+
class RunWebKit1Tests(RunWebKitTests):
def start(self):
self.setCommand(self.command + ['--dump-render-tree'])
Modified: trunk/Tools/BuildSlaveSupport/ews-build/steps_unittest.py (247568 => 247569)
--- trunk/Tools/BuildSlaveSupport/ews-build/steps_unittest.py 2019-07-18 18:57:35 UTC (rev 247568)
+++ trunk/Tools/BuildSlaveSupport/ews-build/steps_unittest.py 2019-07-18 19:03:17 UTC (rev 247569)
@@ -34,7 +34,7 @@
from twisted.python import failure, log
from twisted.trial import unittest
-from steps import (AnalyzeAPITestsResults, AnalyzeCompileWebKitResults, ApplyPatch, ArchiveBuiltProduct, ArchiveTestResults,
+from steps import (AnalyzeAPITestsResults, AnalyzeCompileWebKitResults, AnalyzeLayoutTestsResults, ApplyPatch, ArchiveBuiltProduct, ArchiveTestResults,
CheckOutSource, CheckOutSpecificRevision, CheckPatchRelevance, CheckStyle, CleanBuild, CleanUpGitIndexLock, CleanWorkingDirectory,
CompileJSCOnly, CompileJSCOnlyToT, CompileWebKit, CompileWebKitToT, ConfigureBuild,
DownloadBuiltProduct, ExtractBuiltProduct, ExtractTestResults, InstallGtkDependencies, InstallWpeDependencies, KillOldProcesses,
@@ -1135,6 +1135,111 @@
return self.runStep()
+class TestAnalyzeLayoutTestsResults(BuildStepMixinAdditions, unittest.TestCase):
+ def setUp(self):
+ self.longMessage = True
+ return self.setUpBuildStep()
+
+ def tearDown(self):
+ return self.tearDownBuildStep()
+
+ def configureStep(self):
+ self.setupStep(AnalyzeLayoutTestsResults())
+ self.setProperty('first_results_exceed_failure_limit', False)
+ self.setProperty('second_results_exceed_failure_limit', False)
+ self.setProperty('clean_tree_results_exceed_failure_limit', False)
+ self.setProperty('clean_tree_run_failures', [])
+
+ def test_failure_introduced_by_patch(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', ["jquery/offset.html"])
+ self.setProperty('second_run_failures', ["jquery/offset.html"])
+ self.expectOutcome(result=FAILURE, state_string='Found 1 new Test failure: jquery/offset.html (failure)')
+ return self.runStep()
+
+ def test_failure_on_clean_tree(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', ["jquery/offset.html"])
+ self.setProperty('second_run_failures', ["jquery/offset.html"])
+ self.setProperty('clean_tree_run_failures', ["jquery/offset.html"])
+ self.expectOutcome(result=SUCCESS, state_string='Passed layout tests')
+ return self.runStep()
+
+ def test_flaky_and_consistent_failures_without_clean_tree_failures(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', ['test1', 'test2'])
+ self.setProperty('second_run_failures', ['test1'])
+ self.expectOutcome(result=FAILURE, state_string='Found 1 new Test failure: test1 (failure)')
+ return self.runStep()
+
+ def test_flaky_and_inconsistent_failures_without_clean_tree_failures(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', ['test1', 'test2'])
+ self.setProperty('second_run_failures', ['test3'])
+ self.expectOutcome(result=RETRY, state_string='Unable to confirm if test failures are introduced by patch, retrying build (retry)')
+ return self.runStep()
+
+ def test_flaky_and_inconsistent_failures_with_clean_tree_failures(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', ['test1', 'test2'])
+ self.setProperty('second_run_failures', ['test3'])
+ self.setProperty('clean_tree_run_failures', ['test1', 'test2', 'test3'])
+ self.expectOutcome(result=RETRY, state_string='Unable to confirm if test failures are introduced by patch, retrying build (retry)')
+ return self.runStep()
+
+ def test_flaky_and_consistent_failures_with_clean_tree_failures(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', ['test1', 'test2'])
+ self.setProperty('second_run_failures', ['test1'])
+ self.setProperty('clean_tree_run_failures', ['test1', 'test2'])
+ self.expectOutcome(result=RETRY, state_string='Unable to confirm if test failures are introduced by patch, retrying build (retry)')
+ return self.runStep()
+
+ def test_first_run_exceed_failure_limit(self):
+ self.configureStep()
+ self.setProperty('first_results_exceed_failure_limit', True)
+ self.setProperty('first_run_failures', ['test{}'.format(i) for i in range(0, 30)])
+ self.setProperty('second_run_failures', [])
+ self.expectOutcome(result=RETRY, state_string='Unable to confirm if test failures are introduced by patch, retrying build (retry)')
+ return self.runStep()
+
+ def test_second_run_exceed_failure_limit(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', [])
+ self.setProperty('second_results_exceed_failure_limit', True)
+ self.setProperty('second_run_failures', ['test{}'.format(i) for i in range(0, 30)])
+ self.expectOutcome(result=RETRY, state_string='Unable to confirm if test failures are introduced by patch, retrying build (retry)')
+ return self.runStep()
+
+ def test_clean_tree_exceed_failure_limit(self):
+ self.configureStep()
+ self.setProperty('first_run_failures', ['test1'])
+ self.setProperty('second_run_failures', ['test1'])
+ self.setProperty('clean_tree_results_exceed_failure_limit', True)
+ self.setProperty('clean_tree_run_failures', ['test{}'.format(i) for i in range(0, 30)])
+ self.expectOutcome(result=RETRY, state_string='Unable to confirm if test failures are introduced by patch, retrying build (retry)')
+ return self.runStep()
+
+ def test_clean_tree_has_lot_of_failures(self):
+ self.configureStep()
+ self.setProperty('first_results_exceed_failure_limit', True)
+ self.setProperty('first_run_failures', ['test{}'.format(i) for i in range(0, 30)])
+ self.setProperty('second_results_exceed_failure_limit', True)
+ self.setProperty('second_run_failures', ['test{}'.format(i) for i in range(0, 30)])
+ self.setProperty('clean_tree_run_failures', ['test{}'.format(i) for i in range(0, 27)])
+ self.expectOutcome(result=RETRY, state_string='Unable to confirm if test failures are introduced by patch, retrying build (retry)')
+ return self.runStep()
+
+ def test_clean_tree_has_some_failures(self):
+ self.configureStep()
+ self.setProperty('first_results_exceed_failure_limit', True)
+ self.setProperty('first_run_failures', ['test{}'.format(i) for i in range(0, 30)])
+ self.setProperty('second_results_exceed_failure_limit', True)
+ self.setProperty('second_run_failures', ['test{}'.format(i) for i in range(0, 30)])
+ self.setProperty('clean_tree_run_failures', ['test{}'.format(i) for i in range(0, 10)])
+ self.expectOutcome(result=FAILURE, state_string='Found 30 new Test failures: test1, test0, test3, test2, test5, test4, test7, test6, test9, test8, test24, test25, test26, test27, test20, test21, test22, test23, test28, test29, test19, test18, test11, test10, test13, test12, test15, test14, test17, test16 (failure)')
+ return self.runStep()
+
class TestCheckOutSpecificRevision(BuildStepMixinAdditions, unittest.TestCase):
def setUp(self):
self.longMessage = True