This is an automated email from the ASF dual-hosted git repository. aljoscha pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git
commit 41e9f7831ae0eb5924cb21a4a2a7a64e9cb16bc6 Author: Aljoscha Krettek <[email protected]> AuthorDate: Thu Aug 1 13:04:24 2019 +0200 [FLINK-10368] Harden Dockerized Kerberos tests by waiting for NM to be up Before, we didn't wait for Yarn NodeManagers to be up. This meant that sometimes the Flink Job would not have enough resources to run. --- .../test-scripts/test_yarn_kerberos_docker.sh | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh b/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh index 6f66236..6a170b5 100755 --- a/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh +++ b/flink-end-to-end-tests/test-scripts/test_yarn_kerberos_docker.sh @@ -75,7 +75,7 @@ function start_hadoop_cluster() { return 1 else echo "Waiting for hadoop cluster to come up. We have been trying for $time_diff seconds, retrying ..." - sleep 10 + sleep 5 fi done @@ -88,6 +88,26 @@ function start_hadoop_cluster() { return 1 fi + # try and see if NodeManagers are up, otherwise the Flink job will not have enough resources + # to run + nm_running="0" + start_time=$(date +%s) + while [ "$nm_running" -lt "2" ]; do + current_time=$(date +%s) + time_diff=$((current_time - start_time)) + + if [ $time_diff -ge $MAX_RETRY_SECONDS ]; then + return 1 + else + echo "We only have $nm_running NodeManagers up. We have been trying for $time_diff seconds, retrying ..." + sleep 1 + fi + + docker exec -it master bash -c "kinit -kt /home/hadoop-user/hadoop-user.keytab hadoop-user" + nm_running=`docker exec -it master bash -c "yarn node -list" | grep RUNNING | wc -l` + docker exec -it master bash -c "kdestroy" + done + return 0 } echo "Building Hadoop Docker container"
