This is an automated email from the ASF dual-hosted git repository. mhanson pushed a commit to branch release/1.11.0 in repository https://gitbox.apache.org/repos/asf/geode.git
commit 6e7681ddeeb9f669b9c74941424676f6293c2b7e Author: Helena Bales <[email protected]> AuthorDate: Mon Dec 16 11:16:55 2019 -0800 GEODE-7554: Add retry mechanism for failed tests (#4461) * GEODE-7554: Add retry mechanism for failed tests There are still some flaky benchmarks. Retry tests that have failed, up to 5 times, to determine if the failure is legitimate or just a flaky test. If the test fails 5 times in a row, we know that it is a legitimate failure. With each run of run_against_baseline (in the apache/geode-benchmarks repo), the class names of all failed tests will be written to a file. The run_benchmarks script called by CI will read the failed tests from the file and run only those tests. Once the return code is 0, or once we've tried 5 times, exit. * build failedFile path dynamically Signed-off-by: Helena A. Bales <[email protected]> * add print of iteration number for CI debugging of failures Signed-off-by: Robert Houghton <[email protected]> * triggering CI again. Previous status never updated Signed-off-by: Helena A. Bales <[email protected]> * refactor if statement for readability * restarting CI * remove hardcoded cluster tag (cherry picked from commit 278c2470a5cac2c332d13914b935f0618b820a91) --- ci/scripts/run_benchmarks.sh | 46 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/ci/scripts/run_benchmarks.sh b/ci/scripts/run_benchmarks.sh index 92f072d..ae18968 100755 --- a/ci/scripts/run_benchmarks.sh +++ b/ci/scripts/run_benchmarks.sh @@ -41,15 +41,51 @@ pushd geode GEODE_SHA=$(git rev-parse --verify HEAD) popd +input= "$(pwd)/results/failedTests" + pushd geode-benchmarks/infrastructure/scripts/aws/ ./launch_cluster.sh -t ${CLUSTER_TAG} -c ${CLUSTER_COUNT} --ci -if [ -z "${BASELINE_VERSION}" ]; then - ./run_against_baseline.sh -t ${CLUSTER_TAG} -b ${GEODE_SHA} -B ${BASELINE_BRANCH} -e ${BENCHMARKS_BRANCH} -o ${RESULTS_DIR} -m "'source':'geode-ci','benchmark_branch':'${BENCHMARK_BRANCH}','baseline_branch':'${BASELINE_BRANCH}','geode_branch':'${GEODE_SHA}'" --ci -- ${FLAGS} ${TEST_OPTIONS} -else - ./run_against_baseline.sh -t ${CLUSTER_TAG} -b ${GEODE_SHA} -V ${BASELINE_VERSION} -e ${BENCHMARKS_BRANCH} -o ${RESULTS_DIR} -m "'source':'geode-ci','benchmark_branch':'${BENCHMARK_BRANCH}','baseline_version':'${BASELINE_VERSION}','geode_branch':'${GEODE_SHA}'" --ci -- ${FLAGS} ${TEST_OPTIONS} -fi +# test retry loop - Check if any tests have failed. If so, overwrite the TEST_OPTIONS with only the +# failed tests. Test failures only result in an exit code of 1 when on the last iteration of loop. +for i in {1..5} +do + echo "This is ITERATION ${i} of benchmarking against baseline." + + if [[ -f ${input} ]]; then + unset TEST_OPTIONS + TEST_OPTIONS="" + while IFS= read -r line; do + test=" --tests $line" + TEST_OPTIONS=${TEST_OPTIONS}${test} + done < ${input} + + rm ${input} + fi + + if [[ ${i} != 5 ]]; then + set +e + fi + if [ -z "${BASELINE_VERSION}" ]; then + BASELINE_OPTION="-B ${BASELINE_BRANCH}" + METADATA_BASELINE="'benchmark_branch':'${BASELINE_BRANCH}'" + else + BASELINE_OPTION="-V ${BASELINE_VERSION}" + METADATA_BASELINE="'benchmark_version':'${BASELINE_VERSION}'" + fi + ./run_on_cluster.sh -t ${CLUSTER_TAG} -- pkill -9 java + ./run_on_cluster.sh -t ${CLUSTER_TAG} -- rm /home/geode/locator10334view.dat; + ./run_against_baseline.sh -t ${CLUSTER_TAG} -b ${GEODE_SHA} ${BASELINE_OPTION} -e ${BENCHMARKS_BRANCH} -o ${RESULTS_DIR} -m "'source':'geode-ci',${METADATA_BASELINE},'baseline_branch':'${BASELINE_BRANCH}','geode_branch':'${GEODE_SHA}'" --ci -- ${FLAGS} ${TEST_OPTIONS} + + if [[ $? -eq 0 ]]; then + break; + fi + + if [[ i != 5 ]]; then + set -e + fi +done popd
