This is an automated email from the ASF dual-hosted git repository. aljoscha pushed a commit to branch release-1.8 in repository https://gitbox.apache.org/repos/asf/flink.git
commit 94415058a3e71ff53b7d3985fa038fa1c4e4aefa Author: Aljoscha Krettek <[email protected]> AuthorDate: Thu Aug 1 13:04:24 2019 +0200 [FLINK-10368] Harden Dockerized Kerberos tests by waiting for NM to be up Before, we didn't wait for Yarn NodeManagers to be up. This meant that sometimes the Flink Job would not have enough resources to run. --- .../test-scripts/test_yarn_kerberos_docker.sh | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh b/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh index 5f2dea2..528dfed 100755 --- a/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh +++ b/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh @@ -61,7 +61,7 @@ function start_hadoop_cluster() { return 1 else echo "Waiting for hadoop cluster to come up. We have been trying for $time_diff seconds, retrying ..." - sleep 10 + sleep 5 fi done @@ -74,6 +74,26 @@ function start_hadoop_cluster() { return 1 fi + # try and see if NodeManagers are up, otherwise the Flink job will not have enough resources + # to run + nm_running="0" + start_time=$(date +%s) + while [ "$nm_running" -lt "2" ]; do + current_time=$(date +%s) + time_diff=$((current_time - start_time)) + + if [ $time_diff -ge $MAX_RETRY_SECONDS ]; then + return 1 + else + echo "We only have $nm_running NodeManagers up. We have been trying for $time_diff seconds, retrying ..." + sleep 1 + fi + + docker exec -it master bash -c "kinit -kt /home/hadoop-user/hadoop-user.keytab hadoop-user" + nm_running=`docker exec -it master bash -c "yarn node -list" | grep RUNNING | wc -l` + docker exec -it master bash -c "kdestroy" + done + return 0 }
