[incubator-mxnet] branch v1.x updated: [v1.x] CI fixes to make more stable and upgradable (#19895)

lausen Tue, 16 Feb 2021 08:43:46 -0800

This is an automated email from the ASF dual-hosted git repository.

lausen pushed a commit to branch v1.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git



The following commit(s) were added to refs/heads/v1.x by this push:
     new f6f4a5f  [v1.x] CI fixes to make more stable and upgradable (#19895)
f6f4a5f is described below

commit f6f4a5fd51c5f6442d2ef69228b08a70346812d6
Author: Joe Evans <[email protected]>
AuthorDate: Tue Feb 16 08:41:52 2021 -0800

    [v1.x] CI fixes to make more stable and upgradable (#19895)
    
    * Test moving pipelines from p3 to g4.
    
    * Remove fallback codecov command - the existing (first) command works and 
the second always fails a few times before finally succeeding (and also doesn't 
support the -P parameter, which causes an error.)
    
    * Stop using docker python client, since it still doesn't support latest 
nvidia 'gpus' attribute. Switch to using subprocess calls using list parameter 
(to avoid shell injections).
    
    See https://github.com/docker/docker-py/issues/2395
    
    * Remove old files.
    
    * Fix comment
    
    * Set default environment variables
    
    * Fix GPU syntax.
    
    * Use subprocess.run and redirect output to stdout, don't run docker in 
interactive mode.
    
    * Check if codecov works without providing parameters now.
    
    * Send docker stderr to sys.stderr
    
    * Support both nvidia-docker configurations, first try '--gpus all', and if 
that fails, then try '--runtime nvidia'.
    
    Co-authored-by: Joe Evans <[email protected]>
---
 ci/Jenkinsfile_utils.groovy     |  15 +-
 ci/build.py                     |  85 +++-----
 ci/jenkins/Jenkins_steps.groovy |   6 +-
 ci/safe_docker_run.py           | 253 ------------------------
 ci/test_safe_docker_run.py      | 427 ----------------------------------------
 5 files changed, 34 insertions(+), 752 deletions(-)

diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
index e4135cc..523fad9 100644
--- a/ci/Jenkinsfile_utils.groovy
+++ b/ci/Jenkinsfile_utils.groovy
@@ -112,20 +112,7 @@ def get_git_commit_hash() {
 }
 
 def publish_test_coverage() {
-    // CodeCovs auto detection has trouble with our CIs PR validation due the 
merging strategy
-    git_commit_hash = get_git_commit_hash()
-
-    if (env.CHANGE_ID) {
-      // PR execution
-      codecovArgs = "-B ${env.CHANGE_TARGET} -C ${git_commit_hash} -P 
${env.CHANGE_ID}"
-    } else {
-      // Branch execution
-      codecovArgs = "-B ${env.BRANCH_NAME} -C ${git_commit_hash}"
-    }
-
-    // To make sure we never fail because test coverage reporting is not 
available
-    // Fall back to our own copy of the bash helper if it failed to download 
the public version
-    sh "(curl --retry 10 -s https://codecov.io/bash | bash -s - 
${codecovArgs}) || (curl --retry 10 -s 
https://s3-us-west-2.amazonaws.com/mxnet-ci-prod-slave-data/codecov-bash.txt | 
bash -s - ${codecovArgs}) || true"
+    sh "curl -s https://codecov.io/bash | bash"
 }
 
 def collect_test_results_unix(original_file_name, new_file_name) {
diff --git a/ci/build.py b/ci/build.py
index 1f7217a..645eb96 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -38,7 +38,6 @@ from itertools import chain
 from subprocess import check_call, check_output
 from typing import *
 
-from safe_docker_run import SafeDockerClient
 from util import *
 
 
@@ -187,8 +186,7 @@ def default_ccache_dir() -> str:
     return os.path.join(os.path.expanduser("~"), ".ccache")
 
 
-def container_run(docker_client: SafeDockerClient,
-                  platform: str,
+def container_run(platform: str,
                   nvidia_runtime: bool,
                   docker_registry: str,
                   shared_memory_size: str,
@@ -197,17 +195,12 @@ def container_run(docker_client: SafeDockerClient,
                   environment: Dict[str, str],
                   dry_run: bool = False) -> int:
     """Run command in a container"""
-    container_wait_s = 600
-    #
-    # Environment setup
-    #
+    # set default environment variables
     environment.update({
         'CCACHE_MAXSIZE': '500G',
         'CCACHE_TEMPDIR': '/tmp/ccache',  # temp dir should be local and not 
shared
-        'CCACHE_DIR': '/work/ccache',  # this path is inside the container as 
/work/ccache is
-                                       # mounted
-        'CCACHE_LOGFILE': '/tmp/ccache.log',  # a container-scoped log, useful 
for ccache
-                                              # verification.
+        'CCACHE_DIR': '/work/ccache',  # this path is inside the container as 
/work/ccache is mounted
+        'CCACHE_LOGFILE': '/tmp/ccache.log',  # a container-scoped log, useful 
for ccache verification.
     })
     environment.update({k: os.environ[k] for k in ['CCACHE_MAXSIZE'] if k in 
os.environ})
 
@@ -219,13 +212,9 @@ def container_run(docker_client: SafeDockerClient,
     os.makedirs(local_ccache_dir, exist_ok=True)
     logging.info("Using ccache directory: %s", local_ccache_dir)
 
-    # Equivalent command
-    docker_cmd_list = [
-        "docker",
-        'run',
-        "--gpus all" if nvidia_runtime else "",
-        "--cap-add",
-        "SYS_PTRACE", # Required by ASAN
+    # Build docker command
+    docker_arg_list = [
+        "--cap-add", "SYS_PTRACE", # Required by ASAN
         '--rm',
         '--shm-size={}'.format(shared_memory_size),
         # mount mxnet root
@@ -241,40 +230,27 @@ def container_run(docker_client: SafeDockerClient,
         '-e', "CCACHE_DIR={}".format(environment['CCACHE_DIR']),
         # a container-scoped log, useful for ccache verification.
         '-e', "CCACHE_LOGFILE={}".format(environment['CCACHE_LOGFILE']),
-        '-ti',
-        tag]
-    docker_cmd_list.extend(command)
-    docker_cmd = ' \\\n\t'.join(docker_cmd_list)
-    logging.info("Running %s in container %s", command, tag)
-    logging.info("Executing the equivalent of:\n%s\n", docker_cmd)
+    ]
+    docker_arg_list += [tag]
+    docker_arg_list.extend(command)
+
+    def docker_run_cmd(cmd):
+        logging.info("Running %s in container %s", command, tag)
+        logging.info("Executing command:\n%s\n", ' \\\n\t'.join(cmd))
+        subprocess.run(cmd, stdout=sys.stdout, stderr=sys.stderr, check=True)
 
     if not dry_run:
-        #############################
-        #
-        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT, 
signal.SIGTERM})
-        # noinspection PyShadowingNames
-        runtime = None
-        if nvidia_runtime:
-            # noinspection PyShadowingNames
-            # runc is default (docker info | grep -i runtime)
-            runtime = 'nvidia'
-
-        return docker_client.run(
-            tag,
-            runtime=runtime,
-            command=command,
-            shm_size=shared_memory_size,
-            user='{}:{}'.format(os.getuid(), os.getgid()),
-            cap_add='SYS_PTRACE',
-            volumes={
-                mx_root:
-                    {'bind': '/work/mxnet', 'mode': 'rw'},
-                local_build_folder:
-                    {'bind': '/work/build', 'mode': 'rw'},
-                local_ccache_dir:
-                    {'bind': '/work/ccache', 'mode': 'rw'},
-            },
-            environment=environment)
+        if not nvidia_runtime:
+            docker_run_cmd(['docker', 'run'] + docker_arg_list)
+        else:
+            try:
+                docker_run_cmd(['docker', 'run', '--gpus', 'all'] + 
docker_arg_list)
+            except subprocess.CalledProcessError as e:
+                if e.returncode == 125:
+                    docker_run_cmd(['docker', 'run', '--runtime', 'nvidia'] + 
docker_arg_list)
+                else:
+                    raise
+
     return 0
 
 
@@ -378,7 +354,6 @@ def main() -> int:
     args = parser.parse_args()
 
     command = list(chain(*args.command))
-    docker_client = SafeDockerClient()
 
     environment = dict([(e.split('=')[:2] if '=' in e else (e, os.environ[e]))
                         for e in args.environment])
@@ -405,13 +380,13 @@ def main() -> int:
         ret = 0
         if command:
             ret = container_run(
-                docker_client=docker_client, platform=platform, 
nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, 
docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, environment=environment)
         elif args.print_docker_run:
             command = []
             ret = container_run(
-                docker_client=docker_client, platform=platform, 
nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, 
docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, dry_run=True, 
environment=environment)
         else:
@@ -419,7 +394,7 @@ def main() -> int:
             command = ["/work/mxnet/ci/docker/runtime_functions.sh", 
"build_{}".format(platform)]
             logging.info("No command specified, trying default build: %s", ' 
'.join(command))
             ret = container_run(
-                docker_client=docker_client, platform=platform, 
nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, 
docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, environment=environment)
 
@@ -449,7 +424,7 @@ def main() -> int:
                 continue
             command = ["/work/mxnet/ci/docker/runtime_functions.sh", 
build_platform]
             container_run(
-                docker_client=docker_client, platform=platform, 
nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, 
docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, environment=environment)
             shutil.move(buildir(), plat_buildir)
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index c985a18..da6a74a 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -769,7 +769,7 @@ def test_unix_python3_gpu_cu110() {
 
 def test_unix_python3_quantize_gpu() {
     return ['Python3: Quantize GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-quantize-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
@@ -787,7 +787,7 @@ def test_unix_python3_quantize_gpu() {
 
 def test_unix_python3_quantize_gpu_cu110() {
     return ['Python3+CUDA11.0: Quantize GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-quantize-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
@@ -903,7 +903,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
 
 def test_unix_python3_tensorrt_gpu() {
     return ['Python3: TensorRT GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/build-tensorrt') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
diff --git a/ci/safe_docker_run.py b/ci/safe_docker_run.py
deleted file mode 100755
index 9c90c2a..0000000
--- a/ci/safe_docker_run.py
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Docker command wrapper to guard against Zombie containers
-"""
-
-import argparse
-import atexit
-import logging
-import os
-import random
-import signal
-import sys
-import time
-from functools import reduce
-from itertools import chain
-from typing import Dict, Any
-
-import docker
-from docker.errors import NotFound
-from docker.models.containers import Container
-
-from util import config_logging
-
-DOCKER_STOP_TIMEOUT_SECONDS = 3
-CONTAINER_WAIT_SECONDS = 600
-
-
-class SafeDockerClient:
-    """
-    A wrapper around the docker client to ensure that no zombie containers are 
left hanging around
-    in case the script is not allowed to finish normally
-    """
-
-    @staticmethod
-    def _trim_container_id(cid):
-        """:return: trimmed container id"""
-        return cid[:12]
-
-    def __init__(self):
-        self._docker_client = docker.from_env()
-        self._containers = set()
-        self._docker_stop_timeout = DOCKER_STOP_TIMEOUT_SECONDS
-        self._container_wait_seconds = CONTAINER_WAIT_SECONDS
-
-        def signal_handler(signum, _):
-            signal.pthread_sigmask(signal.SIG_BLOCK, {signum})
-            logging.warning("Signal %d received, cleaning up...", signum)
-            self._clean_up()
-            logging.warning("done. Exiting with error.")
-            sys.exit(1)
-
-        atexit.register(self._clean_up)
-        signal.signal(signal.SIGTERM, signal_handler)
-        signal.signal(signal.SIGINT, signal_handler)
-
-    def _clean_up(self):
-        if self._containers:
-            logging.warning("Cleaning up containers")
-        else:
-            return
-        # noinspection PyBroadException
-        try:
-            stop_timeout = int(os.environ.get("DOCKER_STOP_TIMEOUT", 
self._docker_stop_timeout))
-        except Exception:
-            stop_timeout = 3
-        for container in self._containers:
-            try:
-                container.stop(timeout=stop_timeout)
-                logging.info("☠: stopped container %s", 
self._trim_container_id(container.id))
-                container.remove()
-                logging.info("🚽: removed container %s", 
self._trim_container_id(container.id))
-            except Exception as e:
-                logging.exception(e)
-        self._containers.clear()
-        logging.info("Cleaning up containers finished.")
-
-    def _add_container(self, container: Container) -> Container:
-        self._containers.add(container)
-        return container
-
-    def _remove_container(self, container: Container):
-        self._containers.remove(container)
-
-    def run(self, *args, **kwargs) -> int:
-        if "detach" in kwargs and kwargs.get("detach") is False:
-            raise ValueError("Can only safe run with 'detach' set to True")
-        else:
-            kwargs["detach"] = True
-
-        # These variables are passed to the container so the process tree 
killer can find runaway
-        # process inside the container
-        # https://wiki.jenkins.io/display/JENKINS/ProcessTreeKiller
-        # 
https://github.com/jenkinsci/jenkins/blob/578d6bacb33a5e99f149de504c80275796f0b231/core/src/main/java/hudson/model/Run.java#L2393
-        if "environment" not in kwargs:
-            kwargs["environment"] = {}
-
-        jenkins_env_vars = ["BUILD_NUMBER", "BUILD_ID", "BUILD_TAG"]
-        kwargs["environment"].update({k: os.environ[k] for k in 
jenkins_env_vars if k in os.environ})
-
-        ret = 0
-        try:
-            # Race condition:
-            # add a random sleep to (a) give docker time to flush disk buffer 
after pulling image
-            # and (b) minimize race conditions between jenkins runs on same 
host
-            time.sleep(random.randint(2,10))
-            # If the call to docker_client.containers.run is interrupted, it 
is possible that
-            # the container won't be cleaned up. We avoid this by temporarily 
masking the signals.
-            signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT, 
signal.SIGTERM})
-            container = 
self._add_container(self._docker_client.containers.run(*args, **kwargs))
-            signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT, 
signal.SIGTERM})
-            logging.info("Started container: %s", 
self._trim_container_id(container.id))
-            stream = container.logs(stream=True, stdout=True, stderr=True)
-            sys.stdout.flush()
-            for chunk in stream:
-                sys.stdout.buffer.write(chunk)
-                sys.stdout.buffer.flush()
-            sys.stdout.flush()
-            stream.close()
-
-            try:
-                logging.info("Waiting for status of container %s for %d s.",
-                             self._trim_container_id(container.id),
-                             self._container_wait_seconds)
-                wait_result = 
container.wait(timeout=self._container_wait_seconds)
-                logging.info("Container exit status: %s", wait_result)
-                ret = wait_result.get('StatusCode', 200)
-                if ret != 0:
-                    logging.error("Container exited with an error 😞")
-                    logging.info("Executed command for reproduction:\n\n%s\n", 
" ".join(sys.argv))
-                else:
-                    logging.info("Container exited with success 👍")
-                    logging.info("Executed command for reproduction:\n\n%s\n", 
" ".join(sys.argv))
-            except Exception as err:
-                logging.exception(err)
-                return 150
-
-            try:
-                logging.info("Stopping container: %s", 
self._trim_container_id(container.id))
-                container.stop()
-            except Exception as e:
-                logging.exception(e)
-                ret = 151
-
-            try:
-                logging.info("Removing container: %s", 
self._trim_container_id(container.id))
-                container.remove()
-            except Exception as e:
-                logging.exception(e)
-                ret = 152
-            self._remove_container(container)
-            containers = self._docker_client.containers.list()
-            if containers:
-                logging.info("Other running containers: %s", 
[self._trim_container_id(x.id) for x in containers])
-        except NotFound as e:
-            logging.info("Container was stopped before cleanup started: %s", e)
-
-        return ret
-
-
-def _volume_mount(volume_dfn: str) -> Dict[str, Any]:
-    """
-    Converts docker volume mount format, e.g. docker run --volume 
/local/path:/container/path:ro
-    to an object understood by the python docker library, e.g. {"local/path": 
{"bind": "/container/path", "mode": "ro"}}
-    This is used by the argparser for automatic conversion and input 
validation.
-    If the mode is not specified, 'rw' is assumed.
-    :param volume_dfn: A string to convert to a volume mount object in the 
format <local path>:<container path>[:ro|rw]
-    :return: An object in the form {"<local path>" : {"bind": "<container 
path>", "mode": "rw|ro"}}
-    """
-    if volume_dfn is None:
-        raise argparse.ArgumentTypeError("Missing value for volume definition")
-
-    parts = volume_dfn.split(":")
-
-    if len(parts) < 2 or len(parts) > 3:
-        raise argparse.ArgumentTypeError("Invalid volume definition 
{}".format(volume_dfn))
-
-    mode = "rw"
-    if len(parts) == 3:
-        mode = parts[2]
-
-    if mode not in ["rw", "ro"]:
-        raise argparse.ArgumentTypeError("Invalid volume mount mode {} in 
volume definition {}".format(mode, volume_dfn))
-
-    return {parts[0]: {"bind": parts[1], "mode": mode}}
-
-
-def main(command_line_arguments):
-    config_logging()
-
-    parser = argparse.ArgumentParser(
-        description="""Wrapper around docker run that protects against Zombie 
containers""", epilog="")
-
-    parser.add_argument("-u", "--user",
-                        help="Username or UID (format: 
<name|uid>[:<group|gid>])",
-                        default=None)
-
-    parser.add_argument("-v", "--volume",
-                        action='append',
-                        type=_volume_mount,
-                        help="Bind mount a volume",
-                        default=[])
-
-    parser.add_argument("--cap-add",
-                        help="Add Linux capabilities",
-                        action="append",
-                        type=str,
-                        default=[])
-
-    parser.add_argument("--runtime",
-                        help="Runtime to use for this container",
-                        default=None)
-
-    parser.add_argument("--name",
-                        help="Assign a name to the container",
-                        default=None)
-
-    parser.add_argument("image", metavar="IMAGE")
-    parser.add_argument("command", metavar="COMMAND")
-    parser.add_argument("args", nargs='*', metavar="ARG")
-
-    args = parser.parse_args(args=command_line_arguments)
-    docker_client = SafeDockerClient()
-    return docker_client.run(args.image, **{
-        "command": " ".join(list(chain([args.command] + args.args))),
-        "user": args.user,
-        "runtime": args.runtime,
-        "name": args.name,
-        "volumes": reduce(lambda dct, v: {**dct, **v}, args.volume, {}),
-        "cap_add": args.cap_add
-    })
-
-
-if __name__ == "__main__":
-    exit(main(sys.argv[1:]))
diff --git a/ci/test_safe_docker_run.py b/ci/test_safe_docker_run.py
deleted file mode 100644
index 433d42e..0000000
--- a/ci/test_safe_docker_run.py
+++ /dev/null
@@ -1,427 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Safe docker run tests
-"""
-import itertools
-import os
-import signal
-import unittest
-from typing import Optional
-from unittest.mock import create_autospec, patch, call
-
-from docker import DockerClient
-from docker.models.containers import Container, ContainerCollection
-
-from safe_docker_run import SafeDockerClient, main
-
-
-def create_mock_container(status_code: int = 0):
-    """
-    Creates a mock docker container that exits with the specified status code
-    """
-    mock_container = create_autospec(Container, name="mock_container")
-    mock_container.wait.return_value = {
-        "StatusCode": status_code
-    }
-    return mock_container
-
-
-def create_mock_container_collection(container: Container):
-    """
-    Creates a mock ContainerCollection that return the supplied container when 
the 'run' method is called
-    """
-    mock_container_collection = create_autospec(ContainerCollection, 
name="mock_collection")
-    mock_container_collection.run.return_value = container
-    return mock_container_collection
-
-
-class MockDockerClient:
-    """
-    A mock DockerClient when docker.from_env is called
-    The supplied container will be returned when the client.containers.run 
method is called
-    """
-    def __init__(self, container: Container):
-        self._mock_client = create_autospec(DockerClient, name="mock_client")
-        self._mock_client.containers = 
create_mock_container_collection(container)
-        self._patch = patch("docker.from_env", return_value=self._mock_client)
-
-    def __enter__(self):
-        self._patch.start()
-        return self._mock_client
-
-    def __exit__(self, _, __, ___):
-        self._patch.stop()
-
-
-class TestSafeDockerRun(unittest.TestCase):
-
-    @patch("safe_docker_run.signal.pthread_sigmask")
-    @patch.dict(os.environ, {
-        "BUILD_NUMBER": "BUILD_NUMBER_5",
-        "BUILD_ID": "BUILD_ID_1",
-        "BUILD_TAG": "BUILD_TAG_7"
-    })
-    def test_run_successful(self, mock_pthread_sigmask):
-        """
-        Tests successful run
-        """
-        mock_container = create_mock_container()
-
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-
-            # Check return code is 0
-            assert safe_docker.run("image", "command") == 0
-
-            # Check call to container is correct
-            assert mock_client.containers.run.call_args_list == [
-                call("image", "command", detach=True, environment={
-                    "BUILD_NUMBER": "BUILD_NUMBER_5",
-                    "BUILD_ID": "BUILD_ID_1",
-                    "BUILD_TAG": "BUILD_TAG_7"
-                })
-            ]
-
-            # Check correct signals are blocked then unblocked
-            assert mock_pthread_sigmask.call_args_list == [
-                call(signal.SIG_BLOCK, {signal.SIGINT, signal.SIGTERM}),
-                call(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM})
-            ]
-
-            # Assert container is stopped and removed
-            assert mock_container.stop.call_count == 1
-            assert mock_container.remove.call_count == 1
-            assert len(safe_docker._containers) == 0
-
-    def test_run_detach(self):
-        """
-        Tests detach=True is passed to the underlying call by default
-        """
-        mock_container = create_mock_container()
-
-        # Test detach=True is passed in even if not specified
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 0
-            assert mock_client.containers.run.call_count == 1
-            _, kwargs = mock_client.containers.run.call_args
-            assert kwargs["detach"] is True
-
-        # Test passing in detach=True does not cause any issues
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command", detach=True) == 0
-            assert mock_client.containers.run.call_count == 1
-            _, kwargs = mock_client.containers.run.call_args
-            assert kwargs["detach"] is True
-
-        # Test detach=False fails
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            with self.assertRaises(ValueError):
-                safe_docker.run("image", "command", detach=False)
-                assert mock_client.containers.run.call_args_list == []
-
-    def test_jenkins_vars(self):
-        """
-        Tests jenkins environment variables are appropriately passed to the 
underlying docker run call
-        """
-        # NOTE: It's important that these variables are passed to the 
underlying docker container
-        # These variables are passed to the container so the process tree 
killer can find runaway
-        # process inside the container
-        # https://wiki.jenkins.io/display/JENKINS/ProcessTreeKiller
-        # 
https://github.com/jenkinsci/jenkins/blob/578d6bacb33a5e99f149de504c80275796f0b231/core/src/main/java/hudson/model/Run.java#L2393
-
-        jenkins_vars = {
-            "BUILD_NUMBER": "BUILD_NUMBER_5",
-            "BUILD_ID": "BUILD_ID_1",
-            "BUILD_TAG": "BUILD_TAG_7"
-        }
-        mock_container = create_mock_container()
-
-        # Test environment is empty if the jenkins vars are not present
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 0
-            assert mock_client.containers.run.call_count == 1
-            _, kwargs = mock_client.containers.run.call_args
-            assert kwargs["environment"] == {}
-
-        # Test environment contains jenkins env vars if they are present
-        with MockDockerClient(mock_container) as mock_client:
-            with patch.dict(os.environ, jenkins_vars):
-                safe_docker = SafeDockerClient()
-                assert safe_docker.run("image", "command") == 0
-                assert mock_client.containers.run.call_count == 1
-                _, kwargs = mock_client.containers.run.call_args
-                assert kwargs["environment"] == jenkins_vars
-
-        # Test jenkins env vars are added to callers env vars
-        user_env = {"key1": "value1", "key2": "value2"}
-        with MockDockerClient(mock_container) as mock_client:
-            with patch.dict(os.environ, jenkins_vars):
-                safe_docker = SafeDockerClient()
-                assert safe_docker.run("image", "command", 
environment=user_env) == 0
-                assert mock_client.containers.run.call_count == 1
-                _, kwargs = mock_client.containers.run.call_args
-                assert kwargs["environment"] == {**jenkins_vars, **user_env}
-
-    def test_run_args_kwargs_passed(self):
-        """
-        Tests args and kwargs are passed to the container run call
-        """
-        mock_container = create_mock_container()
-
-        # Test detach=True is passed in even if not specified
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run(
-                "image",
-                "command",
-                "another_arg",
-                str_param="value",
-                bool_param=True,
-                none_param=None,
-                int_param=5,
-                float_param=5.2,
-                list_param=["this", "is", "a", "list"],
-                map_param={
-                    "a": "5",
-                    "b": True,
-                    "c": 2
-                }) == 0
-            assert mock_client.containers.run.call_args_list == [
-                call(
-                    "image",
-                    "command",
-                    "another_arg",
-                    detach=True,
-                    environment={},
-                    str_param="value",
-                    bool_param=True,
-                    none_param=None,
-                    int_param=5,
-                    float_param=5.2,
-                    list_param=["this", "is", "a", "list"],
-                    map_param={
-                        "a": "5",
-                        "b": True,
-                        "c": 2
-                    }
-                )
-            ]
-
-    def test_container_returns_non_zero_status_code(self):
-        """
-        Tests non-zero code from container is returned and the container
-        is cleaned up
-        """
-        mock_container = create_mock_container(status_code=10)
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            # check return code and that container gets cleaned up
-            assert safe_docker.run("image", "command") == 10
-            assert mock_container.stop.call_count == 1
-            assert mock_container.remove.call_count == 1
-            assert len(safe_docker._containers) == 0
-
-    def test_container_wait_raises_returns_150(self):
-        """
-        Tests 150 is returned if an error is raised when calling container.wait
-        """
-        mock_container = create_mock_container()
-        mock_container.wait.side_effect = RuntimeError("Something bad 
happened")
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 150
-
-    def test_container_stop_raises_returns_151(self):
-        """
-        Tests 151 is returned if an error is raised when calling container.stop
-        """
-        mock_container = create_mock_container()
-        mock_container.stop.side_effect = RuntimeError("Something bad 
happened")
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 151
-
-    def test_container_remove_raises_returns_152(self):
-        """
-        Tests 152 is returned if an error is raised when calling 
container.remove
-        """
-        mock_container = create_mock_container()
-        mock_container.remove.side_effect = RuntimeError("Something bad 
happened")
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 152
-
-    def test_main(self):
-        """
-        Tests main function against different command line arguments
-        """
-        tests = [
-            # ( supplied command line arguments, expected call )
-            (
-                ["image", "command"],
-                call("image", command="command", runtime=None, user=None, 
name=None, volumes={}, cap_add=[])
-            ),
-            (
-                ["image", "command", "arg1", "arg2"],
-                call("image", command="command arg1 arg2", runtime=None, 
user=None, name=None, volumes={}, cap_add=[])
-            ),
-            (
-                ["--runtime", "nvidia", "image", "command"],
-                call("image", command="command", runtime="nvidia", user=None, 
name=None, volumes={}, cap_add=[])
-            ),
-            (
-                ["--user", "1001:1001", "image", "command"],
-                call("image", command="command", runtime=None, 
user="1001:1001", name=None, volumes={}, cap_add=[])
-            ),
-            ([
-                "--volume", "/local/path1:/container/path1",
-                "--volume", "/local/path2:/container/path2:ro",
-                "image",
-                "command"
-            ], call("image", command="command", runtime=None, user=None, 
name=None, volumes={
-                "/local/path1": {
-                    "bind": "/container/path1",
-                    "mode": "rw"
-                },
-                "/local/path2": {
-                    "bind": "/container/path2",
-                    "mode": "ro"
-                }
-            }, cap_add=[])),
-            ([
-                "--runtime", "nvidia",
-                "-u", "1001:1001",
-                "-v", "/local/path1:/container/path1",
-                "-v", "/local/path2:/container/path2:ro",
-                "--cap-add", "bob",
-                "--cap-add", "jimmy",
-                "--name",
-                "container_name",
-                "image",
-                "command",
-                "arg1",
-                "arg2"
-            ], call(
-                "image",
-                command="command arg1 arg2",
-                runtime="nvidia",
-                user="1001:1001",
-                name="container_name",
-                volumes={
-                    "/local/path1": {
-                        "bind": "/container/path1",
-                        "mode": "rw"
-                    },
-                    "/local/path2": {
-                        "bind": "/container/path2",
-                        "mode": "ro"
-                    }
-                }, cap_add=["bob", "jimmy"])
-            )
-        ]
-
-        # Tests valid arguments
-        mock_docker = create_autospec(SafeDockerClient)
-        mock_docker.run.return_value = 0
-        with patch("safe_docker_run.SafeDockerClient", 
return_value=mock_docker):
-            for test in tests:
-                arguments, expected_call = test
-                main(arguments)
-                assert mock_docker.run.call_args == expected_call
-
-        # Tests invalid arguments
-        tests = [
-            [],
-            None,
-            ["image"],
-            # Test some bad volume mounts
-            ["-v", "bob", "image", "args"],
-            ["-v", "/local/path", "image", "args"],
-            ["-v", "/local/path:/container/path:blah", "image", "args"],
-            ["-v", "", "image", "args"],
-            ["-v", "a:b:c:d", "image", "args"]
-        ]
-
-        mock_docker = create_autospec(SafeDockerClient)
-        with patch("safe_docker_run.SafeDockerClient", 
return_value=mock_docker):
-            with self.assertRaises(SystemExit):
-                for test in tests:
-                    main(test)
-
-    def test_clean_up(self):
-        """
-        Tests container clean up in case of SIGTERM and SIGINT
-        """
-        import subprocess
-        import time
-        import docker.errors
-
-        docker_client = docker.from_env()
-        container_name = "safedockertestcontainer1234"
-
-        def get_container(name: str) -> Optional[Container]:
-            try:
-                return docker_client.containers.get(name)
-            except docker.errors.NotFound:
-                return None
-
-        def remove_container_if_exists(name: str):
-            container = get_container(name)
-            if container:
-                container.stop()
-                container.remove()
-
-        def wait_for_container(name: str) -> bool:
-            for _ in itertools.count(5):
-                if get_container(name):
-                    return True
-                time.sleep(1)
-            return False
-
-        # Clear any containers with container name
-        remove_container_if_exists(container_name)
-
-        # None => not signal is emitted - we should still finish with no 
containers at the end due
-        # to the atexit
-        for sig in [None, signal.SIGTERM, signal.SIGINT]:
-            # Execute the safe docker run script in a different process
-            proc = subprocess.Popen(['./safe_docker_run.py', "--name", 
container_name, "ubuntu:18.04", "sleep 10"])
-            # NOTE: we need to wait for the container to come up as not all 
operating systems support blocking signals
-            if wait_for_container(container_name) is False:
-                raise RuntimeError("Test container did not come up")
-
-            # Issue the signal and wait for the process to finish
-            if sig:
-                proc.send_signal(sig)
-            proc.wait()
-
-            # The container should no longer exist
-            assert get_container(container_name) is None
-
-
-if __name__ == '__main__':
-    import nose
-    nose.main()

[incubator-mxnet] branch v1.x updated: [v1.x] CI fixes to make more stable and upgradable (#19895)

Reply via email to