HBASE-20387 turn flaky test tracking infra into per-branch pipeline. * gather up all the flaky test stuff into a directory * create Jenkins Pipeline DSL for the report generation and the flaky re-testing * have the nightly per-branch job consume the results of flaky reporting
Signed-off-by: Mike Drob <md...@apache.org> Conflicts: dev-support/Dockerfile dev-support/Jenkinsfile dev-support/flaky-tests/flaky-dashboard-template.html dev-support/flaky-tests/report-flakies.py For branches 1, includes a backport of the current version of report-flakies.py and supporting files. Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/18840e95 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/18840e95 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/18840e95 Branch: refs/heads/branch-1 Commit: 18840e9510ed9e3e1c8709938f1cc0bb732a3174 Parents: 971d484 Author: Sean Busbey <bus...@apache.org> Authored: Fri Aug 10 11:28:10 2018 -0500 Committer: Sean Busbey <bus...@apache.org> Committed: Thu Aug 16 23:24:11 2018 -0500 ---------------------------------------------------------------------- dev-support/Dockerfile | 29 ++ dev-support/Jenkinsfile | 6 +- dev-support/findHangingTests.py | 82 ------ dev-support/flaky-tests/findHangingTests.py | 82 ++++++ .../flaky-tests/flaky-dashboard-template.html | 199 +++++++++++++ .../flaky-tests/flaky-reporting.Jenkinsfile | 66 +++++ dev-support/flaky-tests/report-flakies.py | 280 +++++++++++++++++++ .../flaky-tests/run-flaky-tests.Jenkinsfile | 71 +++++ dev-support/hbase_nightly_yetus.sh | 4 - 9 files changed, 728 insertions(+), 91 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/Dockerfile ---------------------------------------------------------------------- diff --git a/dev-support/Dockerfile b/dev-support/Dockerfile new file mode 100644 index 0000000..2c3d61c --- /dev/null +++ b/dev-support/Dockerfile @@ -0,0 +1,29 @@ +## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This Dockerfile is to setup environment for dev-support scripts which require +# dependencies outside of what Apache Jenkins machines may have. +# +# Specifically, it's used for the flaky test reporting job defined in +# dev-support/flaky-tests/flaky-reporting.Jenkinsfile +FROM ubuntu:14.04 + +ADD . /hbase/dev-support + +RUN apt-get -y update \ + && apt-get -y install curl python-pip \ + && pip install -r /hbase/dev-support/python-requirements.txt http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/Jenkinsfile ---------------------------------------------------------------------- diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 7334a4a..0abeae0 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -47,11 +47,7 @@ pipeline { ARCHIVE_PATTERN_LIST = 'TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump' // These tests currently have known failures. Once they burn down to 0, remove from here so that new problems will cause a failure. TESTS_FILTER = 'cc,checkstyle,javac,javadoc,pylint,shellcheck,whitespace,perlcritic,ruby-lint,rubocop,mvnsite,xml' - // Flaky urls for different branches. Replace '-' and '.' in branch name by '_' because those - // characters are not allowed in bash variable name. - // Not excluding flakies from the nightly build for now. - // EXCLUDE_TESTS_URL_master = 'https://builds.apache.org/job/HBase-Find-Flaky-Tests/lastSuccessfulBuild/artifact/excludes/' - // EXCLUDE_TESTS_URL_branch_2 = 'https://builds.apache.org/job/HBase-Find-Flaky-Tests-branch2.0/lastSuccessfulBuild/artifact/excludes/' + EXCLUDE_TESTS_URL = "${JENKINS_URL}/job/HBase-Find-Flaky-Tests/job/${BRANCH_NAME}/lastSuccessfulBuild/artifact/excludes" } parameters { booleanParam(name: 'USE_YETUS_PRERELEASE', defaultValue: false, description: '''Check to use the current HEAD of apache/yetus rather than our configured release. http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/findHangingTests.py ---------------------------------------------------------------------- diff --git a/dev-support/findHangingTests.py b/dev-support/findHangingTests.py deleted file mode 100644 index deccc8b..0000000 --- a/dev-support/findHangingTests.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/python -## -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -## -# script to find hanging test from Jenkins build output -# usage: ./findHangingTests.py <url of Jenkins build console> -# -import urllib2 -import sys -import string -if len(sys.argv) != 2 : - print "ERROR : Provide the jenkins job console URL as the only argument." - exit(1) -print "Fetching " + sys.argv[1] -response = urllib2.urlopen(sys.argv[1]) -i = 0; -tests = {} -failed_tests = {} -summary = 0 -host = False -patch = False -branch = False -while True: - n = response.readline() - if n == "" : - break - if not host and n.find("Building remotely on") >= 0: - host = True - print n.strip() - continue - if not patch and n.find("Testing patch for ") >= 0: - patch = True - print n.strip() - continue - if not branch and n.find("Testing patch on branch ") >= 0: - branch = True - print n.strip() - continue - if n.find("PATCH APPLICATION FAILED") >= 0: - print "PATCH APPLICATION FAILED" - sys.exit(1) - if summary == 0 and n.find("Running tests.") >= 0: - summary = summary + 1 - continue - if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0: - summary = summary + 1 - continue - if summary == 2 and n.find("[INFO] Apache HBase ") >= 0: - sys.stdout.write(n) - continue - if n.find("org.apache.hadoop.hbase") < 0: - continue - test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)]) - if n.find("Running org.apache.hadoop.hbase") > -1 : - tests[test_name] = False - if n.find("Tests run:") > -1 : - if n.find("FAILURE") > -1 or n.find("ERROR") > -1: - failed_tests[test_name] = True - tests[test_name] = True -response.close() - -print "Printing hanging tests" -for key, value in tests.iteritems(): - if value == False: - print "Hanging test : " + key -print "Printing Failing tests" -for key, value in failed_tests.iteritems(): - print "Failing test : " + key http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/flaky-tests/findHangingTests.py ---------------------------------------------------------------------- diff --git a/dev-support/flaky-tests/findHangingTests.py b/dev-support/flaky-tests/findHangingTests.py new file mode 100644 index 0000000..deccc8b --- /dev/null +++ b/dev-support/flaky-tests/findHangingTests.py @@ -0,0 +1,82 @@ +#!/usr/bin/python +## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +# script to find hanging test from Jenkins build output +# usage: ./findHangingTests.py <url of Jenkins build console> +# +import urllib2 +import sys +import string +if len(sys.argv) != 2 : + print "ERROR : Provide the jenkins job console URL as the only argument." + exit(1) +print "Fetching " + sys.argv[1] +response = urllib2.urlopen(sys.argv[1]) +i = 0; +tests = {} +failed_tests = {} +summary = 0 +host = False +patch = False +branch = False +while True: + n = response.readline() + if n == "" : + break + if not host and n.find("Building remotely on") >= 0: + host = True + print n.strip() + continue + if not patch and n.find("Testing patch for ") >= 0: + patch = True + print n.strip() + continue + if not branch and n.find("Testing patch on branch ") >= 0: + branch = True + print n.strip() + continue + if n.find("PATCH APPLICATION FAILED") >= 0: + print "PATCH APPLICATION FAILED" + sys.exit(1) + if summary == 0 and n.find("Running tests.") >= 0: + summary = summary + 1 + continue + if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0: + summary = summary + 1 + continue + if summary == 2 and n.find("[INFO] Apache HBase ") >= 0: + sys.stdout.write(n) + continue + if n.find("org.apache.hadoop.hbase") < 0: + continue + test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)]) + if n.find("Running org.apache.hadoop.hbase") > -1 : + tests[test_name] = False + if n.find("Tests run:") > -1 : + if n.find("FAILURE") > -1 or n.find("ERROR") > -1: + failed_tests[test_name] = True + tests[test_name] = True +response.close() + +print "Printing hanging tests" +for key, value in tests.iteritems(): + if value == False: + print "Hanging test : " + key +print "Printing Failing tests" +for key, value in failed_tests.iteritems(): + print "Failing test : " + key http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/flaky-tests/flaky-dashboard-template.html ---------------------------------------------------------------------- diff --git a/dev-support/flaky-tests/flaky-dashboard-template.html b/dev-support/flaky-tests/flaky-dashboard-template.html new file mode 100644 index 0000000..f37c7d5 --- /dev/null +++ b/dev-support/flaky-tests/flaky-dashboard-template.html @@ -0,0 +1,199 @@ +<!-- + - Licensed to the Apache Software Foundation (ASF) under one + - or more contributor license agreements. See the NOTICE file + - distributed with this work for additional information + - regarding copyright ownership. The ASF licenses this file + - to you under the Apache License, Version 2.0 (the + - "License"); you may not use this file except in compliance + - with the License. You may obtain a copy of the License at + - + - http://www.apache.org/licenses/LICENSE-2.0 + - + - Unless required by applicable law or agreed to in writing, software + - distributed under the License is distributed on an "AS IS" BASIS, + - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + - See the License for the specific language governing permissions and + - limitations under the License. + --> +<!DOCTYPE html> +<html> +<head> + <title>Apache HBase Flaky Dashboard</title> + <style type="text/css"> + table { + table-layout: fixed; + } + th { + font-size: 15px; + } + td { + font-size: 18px; + vertical-align: text-top; + overflow: hidden; + white-space: nowrap; + } + .show_hide_button { + font-size: 100%; + padding: .5em 1em; + border: 0 rgba(0,0,0,0); + border-radius: 10px; + } + </style> +</head> +<body> + <script src="https://d3js.org/d3.v3.min.js"></script> + <script> + var width = 300; + var height = 25; + var x = d3.scale.linear().range([0, width]); + + function csvToArray(csv) { + if (csv.length == 0) + return []; + splits = csv.split(","); + ret = []; + for (i = 0; i < splits.length; i++) { + ret.push(parseInt(splits[i])); + } + return ret; + } + + function sortNumber(a,b) { + return a - b; + } + + function sparkline(elemId, failed, timeout, hanging, success, domain_min, domain_max) { + failed = csvToArray(failed); + timeout = csvToArray(timeout); + hanging = csvToArray(hanging); + success = csvToArray(success); + all = failed.concat(timeout).concat(hanging).concat(success); + all.sort(sortNumber); + x.domain([domain_min, domain_max + 1]); + rect_width = x(domain_min + 1) - x(domain_min) - 1; + svg = d3.select("#" + elemId).append('svg').attr('width', width).attr('height', height); + svg.selectAll("dot") + .data(all) + .enter() + .append("svg:rect") + .attr("x", function(d) { return x(d); }) + .attr("y", 3) + .attr("height", height- 6) + .attr("width", rect_width) + .attr("fill", function(d) { + if (success.includes(d)) return "green"; + else if (timeout.includes(d)) return "gold"; + else if (hanging.includes(d)) return "blue"; + else if (failed.includes(d)) return "red"; + else return "black"; + }) + .append('svg:title') + .text(function(d) { return d; }); + } + </script> +<p> + <img style="vertical-align:middle; display:inline-block;" height="80px" + src="https://hbase.apache.org/images/hbase_logo_with_orca_large.png"> + + <span style="font-size:50px; vertical-align:middle; display:inline-block;"> + Apache HBase Flaky Tests Dashboard + </span> +</p> +<span>Last updated: <b>{{datetime}}</b></span><br> +<span>Count of flaky tests (cumulated from all jobs): + <b>{{bad_tests_count}}</b></span><br> +<br><br> +<span style="font-size:20px;"><b>List of Jobs</b></span><br> +<br> +{% for url in results %} +<a href="#job_{{ loop.index }}">{{ url |e }}</a> +<br> +{% endfor %} +<br> +<br> +<span style="font-size:20px;"><b>Results</b></span><br> +<br> +{% for url in results %} +{% set result = results[url] %} +{% set url_counter = loop.index %} +{# Dedup ids since test names may duplicate across urls #} +<span id="job_{{ url_counter }}" style="font-weight:bold;"> + {{ url |e }}<br> + <a href="{{ url |e }}"> + Go to <img height="16px" src="https://jenkins.io/sites/default/files/jenkins_favicon.ico"> + </a> + + <a href="#">Go to top</a> +</span> +<br/><br/> +Legend : green: success, red: failed, yellow: timeout, blue: hanging +<table> + <tr> + <th width="400px">Test Name</th> + <th width="150px">Flakyness</th> + <th width="200px">Failed/Timeout/Hanging</th> + <th width="300px">Trends</th> + <th>Run Ids</th> + </tr> + {% for test in result %} + {% set all = result[test]['all'] %} + {% set failed = result[test]['failed'] %} + {% set timeout = result[test]['timeout'] %} + {% set hanging = result[test]['hanging'] %} + {% set success = result[test]['success'] %} + <tr> + <td>{{ test |e }}</td> + {% set flakyness = + (failed|length + hanging|length) * 100 / all|length %} + {% if flakyness == 100 %} + <td align="middle" style="background-color:#FF9999;"> + {% else %} + <td align="middle"> + {% endif %} + {{ "{:.1f}% ({} / {})".format( + flakyness, failed|length + hanging|length, all|length) }} + </td> + <td align="middle"> + {{ failed|length }} / {{ timeout|length }} / {{ hanging|length }} + </td> + {# Replace '.' in test names with '_' because dots are part of css selectors. #} + {% set sparkline_id = "sparkline_" ~ test|replace(".","_") ~ "_" ~ url_counter %} + <td id="{{ sparkline_id }}" align="middle"> + </td> + <script>sparkline("{{ sparkline_id }}", "{{ failed|join(',') }}", "{{ timeout|join(',') }}", + "{{ hanging|join(',') }}", "{{ success|join(',') }}", {{ build_ids[url][0] }}, + {{ build_ids[url][-1] }});</script> + <td> + {% set id = "details_" ~ test ~ "_" ~ url_counter %} + <button class="show_hide_button" onclick="toggle('{{ id }}')"> + show/hide</button> + <br/> + <div id="{{ id }}" + style="display: none; width:300px; white-space: normal"> + {% macro print_run_ids(url, run_ids) -%} + {% for i in run_ids %} + <a href="{{ url }}/{{ i }}">{{ i }}</a> + {% endfor %} + {%- endmacro %} + Failed : {{ print_run_ids(url, failed) }}<br/> + Timed Out : {{ print_run_ids(url, timeout) }}<br/> + Hanging : {{ print_run_ids(url, hanging) }}<br/> + Succeeded : {{ print_run_ids(url, success) }} + </div> + </td> + </tr> + {% endfor %} +</table> +<br><br><br> +{% endfor %} +<script type="text/javascript"> + function toggle(id) { + if (document.getElementById(id).style["display"] == "none") { + document.getElementById(id).style["display"] = "block"; + } else { + document.getElementById(id).style["display"] = "none"; + } + } +</script> +</body> +</html> http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/flaky-tests/flaky-reporting.Jenkinsfile ---------------------------------------------------------------------- diff --git a/dev-support/flaky-tests/flaky-reporting.Jenkinsfile b/dev-support/flaky-tests/flaky-reporting.Jenkinsfile new file mode 100644 index 0000000..dfe5b66 --- /dev/null +++ b/dev-support/flaky-tests/flaky-reporting.Jenkinsfile @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +pipeline { + agent { + node { + label 'Hadoop' + } + } + triggers { + cron('@daily') + } + options { + buildDiscarder(logRotator(numToKeepStr: '100')) + timeout (time: 15, unit: 'MINUTES') + timestamps() + } + parameters { + booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.') + } + stages { + stage ('build flaky report') { + steps { + sh '''#!/usr/bin/env bash + set -e + if [ "${DEBUG}" = "true" ]; then + set -x + fi + declare -a flaky_args + flaky_args=("${flaky_args[@]}" --urls "${JENKINS_URL}/job/HBase%20Nightly/job/${BRANCH_NAME}" --is-yetus True --max-builds 5) + flaky_args=("${flaky_args[@]}" --urls "${JENKINS_URL}/job/HBase-Flaky-Tests/job/${BRANCH_NAME}" --is-yetus False --max-builds 40) + docker build -t hbase-dev-support dev-support + docker run -v "${WORKSPACE}":/hbase --workdir=/hbase hbase-dev-support python dev-support/flaky-tests/report-flakies.py --mvn -v "${flaky_args[@]}" +''' + } + } + } + post { + always { + // Has to be relative to WORKSPACE. + archive "includes,excludes,dashboard.html" + publishHTML target: [ + allowMissing: true, + keepAll: true, + alwaysLinkToLastBuild: true, + // Has to be relative to WORKSPACE + reportDir: ".", + reportFiles: 'dashboard.html', + reportName: 'Flaky Test Report' + ] + } + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/flaky-tests/report-flakies.py ---------------------------------------------------------------------- diff --git a/dev-support/flaky-tests/report-flakies.py b/dev-support/flaky-tests/report-flakies.py new file mode 100755 index 0000000..1b3161a --- /dev/null +++ b/dev-support/flaky-tests/report-flakies.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python +## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=invalid-name +# To disable 'invalid constant name' warnings. +# pylint: disable=import-error +# Testing environment may not have all dependencies. + +""" +This script uses Jenkins REST api to collect test result(s) of given build/builds and generates +flakyness data about unittests. +Print help: report-flakies.py -h +""" + +import argparse +import logging +import os +import time +from collections import OrderedDict +from jinja2 import Template + +import requests + +import findHangingTests + +parser = argparse.ArgumentParser() +parser.add_argument( + '--urls', metavar='URL', action='append', required=True, + help='Urls to analyze, which can refer to simple projects, multi-configuration projects or ' + 'individual build run.') +parser.add_argument('--excluded-builds', metavar='n1,n2', action='append', + help='List of build numbers to exclude (or "None"). Not required, ' + 'but if specified, number of uses should be same as that of --urls ' + 'since the values are matched.') +parser.add_argument('--max-builds', metavar='n', action='append', type=int, + help='The maximum number of builds to use (if available on jenkins). Specify ' + '0 to analyze all builds. Not required, but if specified, number of uses ' + 'should be same as that of --urls since the values are matched.') +parser.add_argument('--is-yetus', metavar='True/False', action='append', choices=['True', 'False'], + help='True, if build is yetus style i.e. look for maven output in artifacts; ' + 'False, if maven output is in <url>/consoleText itself.') +parser.add_argument( + "--mvn", action="store_true", + help="Writes two strings for including/excluding these flaky tests using maven flags. These " + "strings are written to files so they can be saved as artifacts and easily imported in " + "other projects. Also writes timeout and failing tests in separate files for " + "reference.") +parser.add_argument("-v", "--verbose", help="Prints more logs.", action="store_true") +args = parser.parse_args() + +logging.basicConfig() +logger = logging.getLogger(__name__) +if args.verbose: + logger.setLevel(logging.INFO) + + +def get_bad_tests(build_url, is_yetus): + """ + Given url of an executed build, analyzes its maven output, and returns + [list of all tests, list of timeout tests, list of failed tests]. + Returns None if can't get maven output from the build or if there is any other error. + """ + logger.info("Analyzing %s", build_url) + needed_fields="_class,building" + if is_yetus: + needed_fields+=",artifacts[fileName,relativePath]" + response = requests.get(build_url + "/api/json?tree=" + needed_fields).json() + if response["building"]: + logger.info("Skipping this build since it is in progress.") + return {} + console_url = None + if is_yetus: + for artifact in response["artifacts"]: + if artifact["fileName"] == "patch-unit-root.txt": + console_url = build_url + "/artifact/" + artifact["relativePath"] + break + if console_url is None: + logger.info("Can't find 'patch-unit-root.txt' artifact for Yetus build %s\n. Ignoring " + "this build.", build_url) + return + else: + console_url = build_url + "/consoleText" + build_result = findHangingTests.get_bad_tests(console_url) + if not build_result: + logger.info("Ignoring build %s", build_url) + return + return build_result + + +def expand_multi_config_projects(cli_args): + """ + If any url is of type multi-configuration project (i.e. has key 'activeConfigurations'), + get urls for individual jobs. + """ + job_urls = cli_args.urls + excluded_builds_arg = cli_args.excluded_builds + max_builds_arg = cli_args.max_builds + is_yetus_arg = cli_args.is_yetus + if excluded_builds_arg is not None and len(excluded_builds_arg) != len(job_urls): + raise Exception("Number of --excluded-builds arguments should be same as that of --urls " + "since values are matched.") + if max_builds_arg is not None and len(max_builds_arg) != len(job_urls): + raise Exception("Number of --max-builds arguments should be same as that of --urls " + "since values are matched.") + final_expanded_urls = [] + for (i, job_url) in enumerate(job_urls): + max_builds = 10000 # Some high number + is_yetus = False + if is_yetus_arg is not None: + is_yetus = is_yetus_arg[i] == "True" + if max_builds_arg is not None and max_builds_arg[i] != 0: + max_builds = int(max_builds_arg[i]) + excluded_builds = [] + if excluded_builds_arg is not None and excluded_builds_arg[i] != "None": + excluded_builds = [int(x) for x in excluded_builds_arg[i].split(",")] + request = requests.get(job_url + "/api/json?tree=_class,activeConfigurations%5Burl%5D") + if request.status_code != 200: + raise Exception("Failed to get job information from jenkins for url '" + job_url + + "'. Jenkins returned HTTP status " + str(request.status_code)) + response = request.json() + if response.has_key("activeConfigurations"): + for config in response["activeConfigurations"]: + final_expanded_urls.append({'url':config["url"], 'max_builds': max_builds, + 'excludes': excluded_builds, 'is_yetus': is_yetus}) + else: + final_expanded_urls.append({'url':job_url, 'max_builds': max_builds, + 'excludes': excluded_builds, 'is_yetus': is_yetus}) + return final_expanded_urls + + +# Set of timeout/failed tests across all given urls. +all_timeout_tests = set() +all_failed_tests = set() +all_hanging_tests = set() +# Contains { <url> : { <bad_test> : { 'all': [<build ids>], 'failed': [<build ids>], +# 'timeout': [<build ids>], 'hanging': [<builds ids>] } } } +url_to_bad_test_results = OrderedDict() +# Contains { <url> : [run_ids] } +# Used for common min/max build ids when generating sparklines. +url_to_build_ids = OrderedDict() + +# Iterates over each url, gets test results and prints flaky tests. +expanded_urls = expand_multi_config_projects(args) +for url_max_build in expanded_urls: + url = url_max_build["url"] + excludes = url_max_build["excludes"] + json_response = requests.get(url + "/api/json?tree=id,builds%5Bnumber,url%5D").json() + if json_response.has_key("builds"): + builds = json_response["builds"] + logger.info("Analyzing job: %s", url) + else: + builds = [{'number': json_response["id"], 'url': url}] + logger.info("Analyzing build : %s", url) + build_id_to_results = {} + num_builds = 0 + url_to_build_ids[url] = [] + build_ids_without_tests_run = [] + for build in builds: + build_id = build["number"] + if build_id in excludes: + continue + result = get_bad_tests(build["url"], url_max_build['is_yetus']) + if not result: + continue + if len(result[0]) > 0: + build_id_to_results[build_id] = result + else: + build_ids_without_tests_run.append(build_id) + num_builds += 1 + url_to_build_ids[url].append(build_id) + if num_builds == url_max_build["max_builds"]: + break + url_to_build_ids[url].sort() + + # Collect list of bad tests. + bad_tests = set() + for build in build_id_to_results: + [_, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build] + all_timeout_tests.update(timeout_tests) + all_failed_tests.update(failed_tests) + all_hanging_tests.update(hanging_tests) + # Note that timedout tests are already included in failed tests. + bad_tests.update(failed_tests.union(hanging_tests)) + + # For each bad test, get build ids where it ran, timed out, failed or hanged. + test_to_build_ids = {key : {'all' : set(), 'timeout': set(), 'failed': set(), + 'hanging' : set(), 'bad_count' : 0} + for key in bad_tests} + for build in build_id_to_results: + [all_tests, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build] + for bad_test in test_to_build_ids: + is_bad = False + if all_tests.issuperset([bad_test]): + test_to_build_ids[bad_test]["all"].add(build) + if timeout_tests.issuperset([bad_test]): + test_to_build_ids[bad_test]['timeout'].add(build) + is_bad = True + if failed_tests.issuperset([bad_test]): + test_to_build_ids[bad_test]['failed'].add(build) + is_bad = True + if hanging_tests.issuperset([bad_test]): + test_to_build_ids[bad_test]['hanging'].add(build) + is_bad = True + if is_bad: + test_to_build_ids[bad_test]['bad_count'] += 1 + + # Calculate flakyness % and successful builds for each test. Also sort build ids. + for bad_test in test_to_build_ids: + test_result = test_to_build_ids[bad_test] + test_result['flakyness'] = test_result['bad_count'] * 100.0 / len(test_result['all']) + test_result['success'] = (test_result['all'].difference( + test_result['failed'].union(test_result['hanging']))) + for key in ['all', 'timeout', 'failed', 'hanging', 'success']: + test_result[key] = sorted(test_result[key]) + + + # Sort tests in descending order by flakyness. + sorted_test_to_build_ids = OrderedDict( + sorted(test_to_build_ids.iteritems(), key=lambda x: x[1]['flakyness'], reverse=True)) + url_to_bad_test_results[url] = sorted_test_to_build_ids + + if len(sorted_test_to_build_ids) > 0: + print "URL: {}".format(url) + print "{:>60} {:10} {:25} {}".format( + "Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness") + for bad_test in sorted_test_to_build_ids: + test_status = sorted_test_to_build_ids[bad_test] + print "{:>60} {:10} {:7} ( {:4} / {:5} / {:5} ) {:2.0f}%".format( + bad_test, len(test_status['all']), test_status['bad_count'], + len(test_status['failed']), len(test_status['timeout']), + len(test_status['hanging']), test_status['flakyness']) + else: + print "No flaky tests founds." + if len(url_to_build_ids[url]) == len(build_ids_without_tests_run): + print "None of the analyzed builds have test result." + + print "Builds analyzed: {}".format(url_to_build_ids[url]) + print "Builds without any test runs: {}".format(build_ids_without_tests_run) + print "" + + +all_bad_tests = all_hanging_tests.union(all_failed_tests) +if args.mvn: + includes = ",".join(all_bad_tests) + with open("./includes", "w") as inc_file: + inc_file.write(includes) + + excludes = ["**/{0}.java".format(bad_test) for bad_test in all_bad_tests] + with open("./excludes", "w") as exc_file: + exc_file.write(",".join(excludes)) + + with open("./timeout", "w") as timeout_file: + timeout_file.write(",".join(all_timeout_tests)) + + with open("./failed", "w") as failed_file: + failed_file.write(",".join(all_failed_tests)) + +dev_support_dir = os.path.dirname(os.path.abspath(__file__)) +with open(os.path.join(dev_support_dir, "flaky-dashboard-template.html"), "r") as f: + template = Template(f.read()) + +with open("dashboard.html", "w") as f: + datetime = time.strftime("%m/%d/%Y %H:%M:%S") + f.write(template.render(datetime=datetime, bad_tests_count=len(all_bad_tests), + results=url_to_bad_test_results, build_ids=url_to_build_ids)) http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/flaky-tests/run-flaky-tests.Jenkinsfile ---------------------------------------------------------------------- diff --git a/dev-support/flaky-tests/run-flaky-tests.Jenkinsfile b/dev-support/flaky-tests/run-flaky-tests.Jenkinsfile new file mode 100644 index 0000000..cbb75c1 --- /dev/null +++ b/dev-support/flaky-tests/run-flaky-tests.Jenkinsfile @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +pipeline { + agent { + node { + label 'Hadoop' + } + } + triggers { + cron('@hourly') + } + options { + // this should roughly match how long we tell the flaky dashboard to look at + buildDiscarder(logRotator(numToKeepStr: '80')) + timeout (time: 2, unit: 'HOURS') + timestamps() + } + parameters { + booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.') + } + tools { + // this should match what the yetus nightly job for the branch will use + maven 'Maven (latest)' + jdk "JDK 1.8 (latest)" + } + stages { + stage ('run flaky tests') { + steps { + sh '''#!/usr/bin/env bash + set -e + declare -a curl_args=(--fail) + declare -a mvn_args=(--batch-mode -fn -Dbuild.id="${BUILD_ID}" -Dmaven.repo.local="${WORKSPACE}/local-repository") + if [ "${DEBUG}" = "true" ]; then + curl_args=("${curl_args[@]}" -v) + mvn_args=("${mvn_args[@]}" -X) + set -x + fi + ulimit -a + rm -rf local-repository/org/apache/hbase + curl "${curl_args[@]}" -o includes.txt "${JENKINS_URL}/job/HBase-Find-Flaky-Tests/job/${BRANCH_NAME}/lastSuccessfulBuild/artifact/includes" + if [ -s includes.txt ]; then + mvn clean package "${mvn_args[@]}" -Dtest="$(cat includes.txt)" -Dmaven.test.redirectTestOutputToFile=true -Dsurefire.firstPartForkCount=3 -Dsurefire.secondPartForkCount=3 + else + echo "set of flaky tests is currently empty." + fi +''' + } + } + } + post { + always { + junit testResults: "**/surefire-reports/*.xml", allowEmptyResults: true + // TODO compress these logs + archive 'includes.txt,**/surefire-reports/*,**/test-data/*' + } + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/18840e95/dev-support/hbase_nightly_yetus.sh ---------------------------------------------------------------------- diff --git a/dev-support/hbase_nightly_yetus.sh b/dev-support/hbase_nightly_yetus.sh index bba5f4d..185a288 100755 --- a/dev-support/hbase_nightly_yetus.sh +++ b/dev-support/hbase_nightly_yetus.sh @@ -71,10 +71,6 @@ YETUS_ARGS=("--tests-filter=${TESTS_FILTER}" "${YETUS_ARGS[@]}") YETUS_ARGS=("--proclimit=10000" "${YETUS_ARGS[@]}") YETUS_ARGS=("--dockermemlimit=20g" "${YETUS_ARGS[@]}") -# Currently, flaky list is calculated only for master branch. -UNDERSCORED_BRANCH_NAME=$(echo ${BRANCH_NAME} | tr '.-' '_') -EXCLUDE_TESTS_URL=$(eval echo "\$EXCLUDE_TESTS_URL_${UNDERSCORED_BRANCH_NAME}") -INCLUDE_TESTS_URL=$(eval echo "\$INCLUDE_TESTS_URL_${UNDERSCORED_BRANCH_NAME}") if [[ -n "${EXCLUDE_TESTS_URL}" ]]; then YETUS_ARGS=("--exclude-tests-url=${EXCLUDE_TESTS_URL}" "${YETUS_ARGS[@]}") fi