HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/d95e6642 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/d95e6642 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/d95e6642 Branch: refs/heads/branch-1.3 Commit: d95e664245b886da97f8ea3d0fbd080c37ef9db1 Parents: 0c0c723 Author: Sean Busbey <bus...@apache.org> Authored: Thu Aug 16 23:55:28 2018 -0500 Committer: Sean Busbey <bus...@apache.org> Committed: Fri Aug 17 00:02:29 2018 -0500 ---------------------------------------------------------------------- dev-support/flaky-tests/findHangingTests.py | 159 ++++++++++++++--------- 1 file changed, 96 insertions(+), 63 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/d95e6642/dev-support/flaky-tests/findHangingTests.py ---------------------------------------------------------------------- diff --git a/dev-support/flaky-tests/findHangingTests.py b/dev-support/flaky-tests/findHangingTests.py old mode 100644 new mode 100755 index deccc8b..328516e --- a/dev-support/flaky-tests/findHangingTests.py +++ b/dev-support/flaky-tests/findHangingTests.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ## # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -15,68 +15,101 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -## -# script to find hanging test from Jenkins build output + +# pylint: disable=invalid-name +# To disable 'invalid constant name' warnings. + +""" +# Script to find hanging test from Jenkins build output # usage: ./findHangingTests.py <url of Jenkins build console> -# -import urllib2 +""" + +import re import sys -import string -if len(sys.argv) != 2 : - print "ERROR : Provide the jenkins job console URL as the only argument." - exit(1) -print "Fetching " + sys.argv[1] -response = urllib2.urlopen(sys.argv[1]) -i = 0; -tests = {} -failed_tests = {} -summary = 0 -host = False -patch = False -branch = False -while True: - n = response.readline() - if n == "" : - break - if not host and n.find("Building remotely on") >= 0: - host = True - print n.strip() - continue - if not patch and n.find("Testing patch for ") >= 0: - patch = True - print n.strip() - continue - if not branch and n.find("Testing patch on branch ") >= 0: - branch = True - print n.strip() - continue - if n.find("PATCH APPLICATION FAILED") >= 0: - print "PATCH APPLICATION FAILED" - sys.exit(1) - if summary == 0 and n.find("Running tests.") >= 0: - summary = summary + 1 - continue - if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0: - summary = summary + 1 - continue - if summary == 2 and n.find("[INFO] Apache HBase ") >= 0: - sys.stdout.write(n) - continue - if n.find("org.apache.hadoop.hbase") < 0: - continue - test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)]) - if n.find("Running org.apache.hadoop.hbase") > -1 : - tests[test_name] = False - if n.find("Tests run:") > -1 : - if n.find("FAILURE") > -1 or n.find("ERROR") > -1: - failed_tests[test_name] = True - tests[test_name] = True -response.close() +import requests + +# If any of these strings appear in the console output, it's a build one should probably ignore +# for analyzing failed/hanging tests. +BAD_RUN_STRINGS = [ + "Slave went offline during the build", # Machine went down, can't do anything about it. + "The forked VM terminated without properly saying goodbye", # JVM crashed. +] + + +def get_bad_tests(console_url): + """ + Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets + the build information. + If there is error getting console text or if there are blacklisted strings in console text, + then returns None. + """ + response = requests.get(console_url) + if response.status_code != 200: + print "Error getting consoleText. Response = {} {}".format( + response.status_code, response.reason) + return + + # All tests: All testcases which were run. + # Hanging test: A testcase which started but never finished. + # Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests, + # timed out tests, etc + # Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be + # included in failed tests. + all_tests_set = set() + hanging_tests_set = set() + failed_tests_set = set() + timeout_tests_set = set() + for line in response.content.splitlines(): + result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line) + if len(result1) == 1: + test_case = result1[0] + if test_case in all_tests_set: + print ("ERROR! Multiple tests with same name '{}'. Might get wrong results " + "for this test.".format(test_case)) + else: + hanging_tests_set.add(test_case) + all_tests_set.add(test_case) + result2 = re.findall("Tests run:.*?- in org.apache.hadoop.hbase.(.*)", line) + if len(result2) == 1: + test_case = result2[0] + if "FAILURE!" in line: + failed_tests_set.add(test_case) + if test_case not in hanging_tests_set: + print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results " + "for this test. This may also happen if maven is set to retry failing " + "tests.".format(test_case)) + else: + hanging_tests_set.remove(test_case) + result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line) + if result3: + test_case = result3.group(1) + timeout_tests_set.add(test_case) + for bad_string in BAD_RUN_STRINGS: + if re.match(".*" + bad_string + ".*", line): + print "Bad string found in build:\n > {}".format(line) + print "Result > total tests: {:4} failed : {:4} timedout : {:4} hanging : {:4}".format( + len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set)) + return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set] + +if __name__ == "__main__": + if len(sys.argv) != 2: + print "ERROR : Provide the jenkins job console URL as the only argument." + sys.exit(1) + + print "Fetching {}".format(sys.argv[1]) + result = get_bad_tests(sys.argv[1]) + if not result: + sys.exit(1) + [all_tests, failed_tests, timedout_tests, hanging_tests] = result + + print "Found {} hanging tests:".format(len(hanging_tests)) + for test in hanging_tests: + print test + print "\n" + print "Found {} failed tests of which {} timed out:".format( + len(failed_tests), len(timedout_tests)) + for test in failed_tests: + print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else "")) -print "Printing hanging tests" -for key, value in tests.iteritems(): - if value == False: - print "Hanging test : " + key -print "Printing Failing tests" -for key, value in failed_tests.iteritems(): - print "Failing test : " + key + print ("\nA test may have had 0 or more atomic test failures before it timed out. So a " + "'Timed Out' test may have other errors too.")