GJL commented on a change in pull request #10538: [FLINK-15135][e2e][Mesos] 
Adding e2e tests for Flink's Mesos integration
URL: https://github.com/apache/flink/pull/10538#discussion_r357659106
 
 

 ##########
 File path: flink-end-to-end-tests/test-scripts/common_mesos_docker.sh
 ##########
 @@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+set -o pipefail
+
+source "$(dirname "$0")"/common.sh
+
+MAX_RETRY_SECONDS=120
+CLUSTER_SETUP_RETRIES=3
+
+export INPUT_VOLUME=${END_TO_END_DIR}/test-scripts/test-data
+
+echo "End-to-end directory $END_TO_END_DIR"
+docker --version
+docker-compose --version
+
+start_time=$(date +%s)
+
+# make sure we stop our cluster at the end
+function cluster_shutdown {
+  docker-compose -f 
$END_TO_END_DIR/test-scripts/docker-mesos-cluster/docker-compose.yml down
+  sudo rm -rf ${INPUT_VOLUME}/log
+  sudo rm -rf ${INPUT_VOLUME}/tmp
+}
+on_exit cluster_shutdown
+
+function wait_flink_running_on_mesos {
+  # wait at most 10 seconds until the dispatcher is up
+  local QUERY_URL="http://mesos-master:8081/taskmanagers";
+  local TIMEOUT=20
+  for i in $(seq 1 ${TIMEOUT}); do
+    # without the || true this would exit our script if the JobManager is not 
yet up
+    QUERY_RESULT=$(docker exec -it mesos-master curl "$QUERY_URL" 2> /dev/null 
|| true)
+
+    # ensure the taskmanagers field is there at all and is not empty
+    if [[ ${QUERY_RESULT} =~ \{\"taskmanagers\":\[.*\]\} ]]; then
+      echo "Dispatcher REST endpoint is up."
+      return
+    fi
+
+    echo "Waiting for dispatcher REST endpoint to come up..."
+    sleep 1
+  done
+  echo "Dispatcher REST endpoint has not started within a timeout of 
${TIMEOUT} sec"
+  exit 1
+}
+
+function start_flink_cluster_with_mesos() {
+    echo "Starting Flink on Mesos cluster"
+    build_image
+
+    docker-compose -f 
$END_TO_END_DIR/test-scripts/docker-mesos-cluster/docker-compose.yml up -d
+
+    # wait for the Mesos master and slave set up
+    start_time=$(date +%s)
+    until docker logs mesos-master 2>&1 | grep -q "Successfully recovered 
registrar"
+    do
+        until docker logs mesos-slave 2>&1 | grep -q "Detecting new master"
+        do
+            current_time=$(date +%s)
+            time_diff=$((current_time - start_time))
+
+            if [ $time_diff -ge $MAX_RETRY_SECONDS ]; then
+                return 1
+            else
+                echo "Waiting for mesos cluster to come up. We have been 
trying for $time_diff seconds, retrying ..."
+                sleep 5
+            fi
+        done
+    done
+
+    # perform health checks
+    if ! { [ $(docker inspect -f '{{.State.Running}}' mesos-master 2>&1) = 
'true' ] &&
+           [ $(docker inspect -f '{{.State.Running}}' mesos-slave 2>&1) = 
'true' ]; };
+    then
+        return 1
+    fi
+
+    set_config_key "jobmanager.rpc.address" "mesos-master"
+    set_config_key "rest.address" "mesos-master"
+
+    docker exec -it mesos-master nohup bash -c 
"${FLINK_DIR}/bin/mesos-appmaster.sh -Dmesos.master=mesos-master:5050 &"
+    wait_flink_running_on_mesos
+
+    return 0
+}
+
+function build_image() {
+    echo "Building Mesos Docker container"
+    until docker build -f 
$END_TO_END_DIR/test-scripts/docker-mesos-cluster/Dockerfile \
 
 Review comment:
   Why don't we use `retry_times` from `common.sh`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to