This is an automated email from the ASF dual-hosted git repository.
lausen pushed a commit to branch v1.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/v1.x by this push:
new f6f4a5f [v1.x] CI fixes to make more stable and upgradable (#19895)
f6f4a5f is described below
commit f6f4a5fd51c5f6442d2ef69228b08a70346812d6
Author: Joe Evans <[email protected]>
AuthorDate: Tue Feb 16 08:41:52 2021 -0800
[v1.x] CI fixes to make more stable and upgradable (#19895)
* Test moving pipelines from p3 to g4.
* Remove fallback codecov command - the existing (first) command works and
the second always fails a few times before finally succeeding (and also doesn't
support the -P parameter, which causes an error.)
* Stop using docker python client, since it still doesn't support latest
nvidia 'gpus' attribute. Switch to using subprocess calls using list parameter
(to avoid shell injections).
See https://github.com/docker/docker-py/issues/2395
* Remove old files.
* Fix comment
* Set default environment variables
* Fix GPU syntax.
* Use subprocess.run and redirect output to stdout, don't run docker in
interactive mode.
* Check if codecov works without providing parameters now.
* Send docker stderr to sys.stderr
* Support both nvidia-docker configurations, first try '--gpus all', and if
that fails, then try '--runtime nvidia'.
Co-authored-by: Joe Evans <[email protected]>
---
ci/Jenkinsfile_utils.groovy | 15 +-
ci/build.py | 85 +++-----
ci/jenkins/Jenkins_steps.groovy | 6 +-
ci/safe_docker_run.py | 253 ------------------------
ci/test_safe_docker_run.py | 427 ----------------------------------------
5 files changed, 34 insertions(+), 752 deletions(-)
diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
index e4135cc..523fad9 100644
--- a/ci/Jenkinsfile_utils.groovy
+++ b/ci/Jenkinsfile_utils.groovy
@@ -112,20 +112,7 @@ def get_git_commit_hash() {
}
def publish_test_coverage() {
- // CodeCovs auto detection has trouble with our CIs PR validation due the
merging strategy
- git_commit_hash = get_git_commit_hash()
-
- if (env.CHANGE_ID) {
- // PR execution
- codecovArgs = "-B ${env.CHANGE_TARGET} -C ${git_commit_hash} -P
${env.CHANGE_ID}"
- } else {
- // Branch execution
- codecovArgs = "-B ${env.BRANCH_NAME} -C ${git_commit_hash}"
- }
-
- // To make sure we never fail because test coverage reporting is not
available
- // Fall back to our own copy of the bash helper if it failed to download
the public version
- sh "(curl --retry 10 -s https://codecov.io/bash | bash -s -
${codecovArgs}) || (curl --retry 10 -s
https://s3-us-west-2.amazonaws.com/mxnet-ci-prod-slave-data/codecov-bash.txt |
bash -s - ${codecovArgs}) || true"
+ sh "curl -s https://codecov.io/bash | bash"
}
def collect_test_results_unix(original_file_name, new_file_name) {
diff --git a/ci/build.py b/ci/build.py
index 1f7217a..645eb96 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -38,7 +38,6 @@ from itertools import chain
from subprocess import check_call, check_output
from typing import *
-from safe_docker_run import SafeDockerClient
from util import *
@@ -187,8 +186,7 @@ def default_ccache_dir() -> str:
return os.path.join(os.path.expanduser("~"), ".ccache")
-def container_run(docker_client: SafeDockerClient,
- platform: str,
+def container_run(platform: str,
nvidia_runtime: bool,
docker_registry: str,
shared_memory_size: str,
@@ -197,17 +195,12 @@ def container_run(docker_client: SafeDockerClient,
environment: Dict[str, str],
dry_run: bool = False) -> int:
"""Run command in a container"""
- container_wait_s = 600
- #
- # Environment setup
- #
+ # set default environment variables
environment.update({
'CCACHE_MAXSIZE': '500G',
'CCACHE_TEMPDIR': '/tmp/ccache', # temp dir should be local and not
shared
- 'CCACHE_DIR': '/work/ccache', # this path is inside the container as
/work/ccache is
- # mounted
- 'CCACHE_LOGFILE': '/tmp/ccache.log', # a container-scoped log, useful
for ccache
- # verification.
+ 'CCACHE_DIR': '/work/ccache', # this path is inside the container as
/work/ccache is mounted
+ 'CCACHE_LOGFILE': '/tmp/ccache.log', # a container-scoped log, useful
for ccache verification.
})
environment.update({k: os.environ[k] for k in ['CCACHE_MAXSIZE'] if k in
os.environ})
@@ -219,13 +212,9 @@ def container_run(docker_client: SafeDockerClient,
os.makedirs(local_ccache_dir, exist_ok=True)
logging.info("Using ccache directory: %s", local_ccache_dir)
- # Equivalent command
- docker_cmd_list = [
- "docker",
- 'run',
- "--gpus all" if nvidia_runtime else "",
- "--cap-add",
- "SYS_PTRACE", # Required by ASAN
+ # Build docker command
+ docker_arg_list = [
+ "--cap-add", "SYS_PTRACE", # Required by ASAN
'--rm',
'--shm-size={}'.format(shared_memory_size),
# mount mxnet root
@@ -241,40 +230,27 @@ def container_run(docker_client: SafeDockerClient,
'-e', "CCACHE_DIR={}".format(environment['CCACHE_DIR']),
# a container-scoped log, useful for ccache verification.
'-e', "CCACHE_LOGFILE={}".format(environment['CCACHE_LOGFILE']),
- '-ti',
- tag]
- docker_cmd_list.extend(command)
- docker_cmd = ' \\\n\t'.join(docker_cmd_list)
- logging.info("Running %s in container %s", command, tag)
- logging.info("Executing the equivalent of:\n%s\n", docker_cmd)
+ ]
+ docker_arg_list += [tag]
+ docker_arg_list.extend(command)
+
+ def docker_run_cmd(cmd):
+ logging.info("Running %s in container %s", command, tag)
+ logging.info("Executing command:\n%s\n", ' \\\n\t'.join(cmd))
+ subprocess.run(cmd, stdout=sys.stdout, stderr=sys.stderr, check=True)
if not dry_run:
- #############################
- #
- signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT,
signal.SIGTERM})
- # noinspection PyShadowingNames
- runtime = None
- if nvidia_runtime:
- # noinspection PyShadowingNames
- # runc is default (docker info | grep -i runtime)
- runtime = 'nvidia'
-
- return docker_client.run(
- tag,
- runtime=runtime,
- command=command,
- shm_size=shared_memory_size,
- user='{}:{}'.format(os.getuid(), os.getgid()),
- cap_add='SYS_PTRACE',
- volumes={
- mx_root:
- {'bind': '/work/mxnet', 'mode': 'rw'},
- local_build_folder:
- {'bind': '/work/build', 'mode': 'rw'},
- local_ccache_dir:
- {'bind': '/work/ccache', 'mode': 'rw'},
- },
- environment=environment)
+ if not nvidia_runtime:
+ docker_run_cmd(['docker', 'run'] + docker_arg_list)
+ else:
+ try:
+ docker_run_cmd(['docker', 'run', '--gpus', 'all'] +
docker_arg_list)
+ except subprocess.CalledProcessError as e:
+ if e.returncode == 125:
+ docker_run_cmd(['docker', 'run', '--runtime', 'nvidia'] +
docker_arg_list)
+ else:
+ raise
+
return 0
@@ -378,7 +354,6 @@ def main() -> int:
args = parser.parse_args()
command = list(chain(*args.command))
- docker_client = SafeDockerClient()
environment = dict([(e.split('=')[:2] if '=' in e else (e, os.environ[e]))
for e in args.environment])
@@ -405,13 +380,13 @@ def main() -> int:
ret = 0
if command:
ret = container_run(
- docker_client=docker_client, platform=platform,
nvidia_runtime=args.nvidiadocker,
+ platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command,
docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, environment=environment)
elif args.print_docker_run:
command = []
ret = container_run(
- docker_client=docker_client, platform=platform,
nvidia_runtime=args.nvidiadocker,
+ platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command,
docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, dry_run=True,
environment=environment)
else:
@@ -419,7 +394,7 @@ def main() -> int:
command = ["/work/mxnet/ci/docker/runtime_functions.sh",
"build_{}".format(platform)]
logging.info("No command specified, trying default build: %s", '
'.join(command))
ret = container_run(
- docker_client=docker_client, platform=platform,
nvidia_runtime=args.nvidiadocker,
+ platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command,
docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, environment=environment)
@@ -449,7 +424,7 @@ def main() -> int:
continue
command = ["/work/mxnet/ci/docker/runtime_functions.sh",
build_platform]
container_run(
- docker_client=docker_client, platform=platform,
nvidia_runtime=args.nvidiadocker,
+ platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command,
docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, environment=environment)
shutil.move(buildir(), plat_buildir)
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index c985a18..da6a74a 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -769,7 +769,7 @@ def test_unix_python3_gpu_cu110() {
def test_unix_python3_quantize_gpu() {
return ['Python3: Quantize GPU': {
- node(NODE_LINUX_GPU_P3) {
+ node(NODE_LINUX_GPU_G4) {
ws('workspace/ut-python3-quantize-gpu') {
timeout(time: max_time, unit: 'MINUTES') {
try {
@@ -787,7 +787,7 @@ def test_unix_python3_quantize_gpu() {
def test_unix_python3_quantize_gpu_cu110() {
return ['Python3+CUDA11.0: Quantize GPU': {
- node(NODE_LINUX_GPU_P3) {
+ node(NODE_LINUX_GPU_G4) {
ws('workspace/ut-python3-quantize-gpu') {
timeout(time: max_time, unit: 'MINUTES') {
try {
@@ -903,7 +903,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
def test_unix_python3_tensorrt_gpu() {
return ['Python3: TensorRT GPU': {
- node(NODE_LINUX_GPU_P3) {
+ node(NODE_LINUX_GPU_G4) {
ws('workspace/build-tensorrt') {
timeout(time: max_time, unit: 'MINUTES') {
try {
diff --git a/ci/safe_docker_run.py b/ci/safe_docker_run.py
deleted file mode 100755
index 9c90c2a..0000000
--- a/ci/safe_docker_run.py
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Docker command wrapper to guard against Zombie containers
-"""
-
-import argparse
-import atexit
-import logging
-import os
-import random
-import signal
-import sys
-import time
-from functools import reduce
-from itertools import chain
-from typing import Dict, Any
-
-import docker
-from docker.errors import NotFound
-from docker.models.containers import Container
-
-from util import config_logging
-
-DOCKER_STOP_TIMEOUT_SECONDS = 3
-CONTAINER_WAIT_SECONDS = 600
-
-
-class SafeDockerClient:
- """
- A wrapper around the docker client to ensure that no zombie containers are
left hanging around
- in case the script is not allowed to finish normally
- """
-
- @staticmethod
- def _trim_container_id(cid):
- """:return: trimmed container id"""
- return cid[:12]
-
- def __init__(self):
- self._docker_client = docker.from_env()
- self._containers = set()
- self._docker_stop_timeout = DOCKER_STOP_TIMEOUT_SECONDS
- self._container_wait_seconds = CONTAINER_WAIT_SECONDS
-
- def signal_handler(signum, _):
- signal.pthread_sigmask(signal.SIG_BLOCK, {signum})
- logging.warning("Signal %d received, cleaning up...", signum)
- self._clean_up()
- logging.warning("done. Exiting with error.")
- sys.exit(1)
-
- atexit.register(self._clean_up)
- signal.signal(signal.SIGTERM, signal_handler)
- signal.signal(signal.SIGINT, signal_handler)
-
- def _clean_up(self):
- if self._containers:
- logging.warning("Cleaning up containers")
- else:
- return
- # noinspection PyBroadException
- try:
- stop_timeout = int(os.environ.get("DOCKER_STOP_TIMEOUT",
self._docker_stop_timeout))
- except Exception:
- stop_timeout = 3
- for container in self._containers:
- try:
- container.stop(timeout=stop_timeout)
- logging.info("☠: stopped container %s",
self._trim_container_id(container.id))
- container.remove()
- logging.info("🚽: removed container %s",
self._trim_container_id(container.id))
- except Exception as e:
- logging.exception(e)
- self._containers.clear()
- logging.info("Cleaning up containers finished.")
-
- def _add_container(self, container: Container) -> Container:
- self._containers.add(container)
- return container
-
- def _remove_container(self, container: Container):
- self._containers.remove(container)
-
- def run(self, *args, **kwargs) -> int:
- if "detach" in kwargs and kwargs.get("detach") is False:
- raise ValueError("Can only safe run with 'detach' set to True")
- else:
- kwargs["detach"] = True
-
- # These variables are passed to the container so the process tree
killer can find runaway
- # process inside the container
- # https://wiki.jenkins.io/display/JENKINS/ProcessTreeKiller
- #
https://github.com/jenkinsci/jenkins/blob/578d6bacb33a5e99f149de504c80275796f0b231/core/src/main/java/hudson/model/Run.java#L2393
- if "environment" not in kwargs:
- kwargs["environment"] = {}
-
- jenkins_env_vars = ["BUILD_NUMBER", "BUILD_ID", "BUILD_TAG"]
- kwargs["environment"].update({k: os.environ[k] for k in
jenkins_env_vars if k in os.environ})
-
- ret = 0
- try:
- # Race condition:
- # add a random sleep to (a) give docker time to flush disk buffer
after pulling image
- # and (b) minimize race conditions between jenkins runs on same
host
- time.sleep(random.randint(2,10))
- # If the call to docker_client.containers.run is interrupted, it
is possible that
- # the container won't be cleaned up. We avoid this by temporarily
masking the signals.
- signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT,
signal.SIGTERM})
- container =
self._add_container(self._docker_client.containers.run(*args, **kwargs))
- signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT,
signal.SIGTERM})
- logging.info("Started container: %s",
self._trim_container_id(container.id))
- stream = container.logs(stream=True, stdout=True, stderr=True)
- sys.stdout.flush()
- for chunk in stream:
- sys.stdout.buffer.write(chunk)
- sys.stdout.buffer.flush()
- sys.stdout.flush()
- stream.close()
-
- try:
- logging.info("Waiting for status of container %s for %d s.",
- self._trim_container_id(container.id),
- self._container_wait_seconds)
- wait_result =
container.wait(timeout=self._container_wait_seconds)
- logging.info("Container exit status: %s", wait_result)
- ret = wait_result.get('StatusCode', 200)
- if ret != 0:
- logging.error("Container exited with an error 😞")
- logging.info("Executed command for reproduction:\n\n%s\n",
" ".join(sys.argv))
- else:
- logging.info("Container exited with success 👍")
- logging.info("Executed command for reproduction:\n\n%s\n",
" ".join(sys.argv))
- except Exception as err:
- logging.exception(err)
- return 150
-
- try:
- logging.info("Stopping container: %s",
self._trim_container_id(container.id))
- container.stop()
- except Exception as e:
- logging.exception(e)
- ret = 151
-
- try:
- logging.info("Removing container: %s",
self._trim_container_id(container.id))
- container.remove()
- except Exception as e:
- logging.exception(e)
- ret = 152
- self._remove_container(container)
- containers = self._docker_client.containers.list()
- if containers:
- logging.info("Other running containers: %s",
[self._trim_container_id(x.id) for x in containers])
- except NotFound as e:
- logging.info("Container was stopped before cleanup started: %s", e)
-
- return ret
-
-
-def _volume_mount(volume_dfn: str) -> Dict[str, Any]:
- """
- Converts docker volume mount format, e.g. docker run --volume
/local/path:/container/path:ro
- to an object understood by the python docker library, e.g. {"local/path":
{"bind": "/container/path", "mode": "ro"}}
- This is used by the argparser for automatic conversion and input
validation.
- If the mode is not specified, 'rw' is assumed.
- :param volume_dfn: A string to convert to a volume mount object in the
format <local path>:<container path>[:ro|rw]
- :return: An object in the form {"<local path>" : {"bind": "<container
path>", "mode": "rw|ro"}}
- """
- if volume_dfn is None:
- raise argparse.ArgumentTypeError("Missing value for volume definition")
-
- parts = volume_dfn.split(":")
-
- if len(parts) < 2 or len(parts) > 3:
- raise argparse.ArgumentTypeError("Invalid volume definition
{}".format(volume_dfn))
-
- mode = "rw"
- if len(parts) == 3:
- mode = parts[2]
-
- if mode not in ["rw", "ro"]:
- raise argparse.ArgumentTypeError("Invalid volume mount mode {} in
volume definition {}".format(mode, volume_dfn))
-
- return {parts[0]: {"bind": parts[1], "mode": mode}}
-
-
-def main(command_line_arguments):
- config_logging()
-
- parser = argparse.ArgumentParser(
- description="""Wrapper around docker run that protects against Zombie
containers""", epilog="")
-
- parser.add_argument("-u", "--user",
- help="Username or UID (format:
<name|uid>[:<group|gid>])",
- default=None)
-
- parser.add_argument("-v", "--volume",
- action='append',
- type=_volume_mount,
- help="Bind mount a volume",
- default=[])
-
- parser.add_argument("--cap-add",
- help="Add Linux capabilities",
- action="append",
- type=str,
- default=[])
-
- parser.add_argument("--runtime",
- help="Runtime to use for this container",
- default=None)
-
- parser.add_argument("--name",
- help="Assign a name to the container",
- default=None)
-
- parser.add_argument("image", metavar="IMAGE")
- parser.add_argument("command", metavar="COMMAND")
- parser.add_argument("args", nargs='*', metavar="ARG")
-
- args = parser.parse_args(args=command_line_arguments)
- docker_client = SafeDockerClient()
- return docker_client.run(args.image, **{
- "command": " ".join(list(chain([args.command] + args.args))),
- "user": args.user,
- "runtime": args.runtime,
- "name": args.name,
- "volumes": reduce(lambda dct, v: {**dct, **v}, args.volume, {}),
- "cap_add": args.cap_add
- })
-
-
-if __name__ == "__main__":
- exit(main(sys.argv[1:]))
diff --git a/ci/test_safe_docker_run.py b/ci/test_safe_docker_run.py
deleted file mode 100644
index 433d42e..0000000
--- a/ci/test_safe_docker_run.py
+++ /dev/null
@@ -1,427 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Safe docker run tests
-"""
-import itertools
-import os
-import signal
-import unittest
-from typing import Optional
-from unittest.mock import create_autospec, patch, call
-
-from docker import DockerClient
-from docker.models.containers import Container, ContainerCollection
-
-from safe_docker_run import SafeDockerClient, main
-
-
-def create_mock_container(status_code: int = 0):
- """
- Creates a mock docker container that exits with the specified status code
- """
- mock_container = create_autospec(Container, name="mock_container")
- mock_container.wait.return_value = {
- "StatusCode": status_code
- }
- return mock_container
-
-
-def create_mock_container_collection(container: Container):
- """
- Creates a mock ContainerCollection that return the supplied container when
the 'run' method is called
- """
- mock_container_collection = create_autospec(ContainerCollection,
name="mock_collection")
- mock_container_collection.run.return_value = container
- return mock_container_collection
-
-
-class MockDockerClient:
- """
- A mock DockerClient when docker.from_env is called
- The supplied container will be returned when the client.containers.run
method is called
- """
- def __init__(self, container: Container):
- self._mock_client = create_autospec(DockerClient, name="mock_client")
- self._mock_client.containers =
create_mock_container_collection(container)
- self._patch = patch("docker.from_env", return_value=self._mock_client)
-
- def __enter__(self):
- self._patch.start()
- return self._mock_client
-
- def __exit__(self, _, __, ___):
- self._patch.stop()
-
-
-class TestSafeDockerRun(unittest.TestCase):
-
- @patch("safe_docker_run.signal.pthread_sigmask")
- @patch.dict(os.environ, {
- "BUILD_NUMBER": "BUILD_NUMBER_5",
- "BUILD_ID": "BUILD_ID_1",
- "BUILD_TAG": "BUILD_TAG_7"
- })
- def test_run_successful(self, mock_pthread_sigmask):
- """
- Tests successful run
- """
- mock_container = create_mock_container()
-
- with MockDockerClient(mock_container) as mock_client:
- safe_docker = SafeDockerClient()
-
- # Check return code is 0
- assert safe_docker.run("image", "command") == 0
-
- # Check call to container is correct
- assert mock_client.containers.run.call_args_list == [
- call("image", "command", detach=True, environment={
- "BUILD_NUMBER": "BUILD_NUMBER_5",
- "BUILD_ID": "BUILD_ID_1",
- "BUILD_TAG": "BUILD_TAG_7"
- })
- ]
-
- # Check correct signals are blocked then unblocked
- assert mock_pthread_sigmask.call_args_list == [
- call(signal.SIG_BLOCK, {signal.SIGINT, signal.SIGTERM}),
- call(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM})
- ]
-
- # Assert container is stopped and removed
- assert mock_container.stop.call_count == 1
- assert mock_container.remove.call_count == 1
- assert len(safe_docker._containers) == 0
-
- def test_run_detach(self):
- """
- Tests detach=True is passed to the underlying call by default
- """
- mock_container = create_mock_container()
-
- # Test detach=True is passed in even if not specified
- with MockDockerClient(mock_container) as mock_client:
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command") == 0
- assert mock_client.containers.run.call_count == 1
- _, kwargs = mock_client.containers.run.call_args
- assert kwargs["detach"] is True
-
- # Test passing in detach=True does not cause any issues
- with MockDockerClient(mock_container) as mock_client:
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command", detach=True) == 0
- assert mock_client.containers.run.call_count == 1
- _, kwargs = mock_client.containers.run.call_args
- assert kwargs["detach"] is True
-
- # Test detach=False fails
- with MockDockerClient(mock_container) as mock_client:
- safe_docker = SafeDockerClient()
- with self.assertRaises(ValueError):
- safe_docker.run("image", "command", detach=False)
- assert mock_client.containers.run.call_args_list == []
-
- def test_jenkins_vars(self):
- """
- Tests jenkins environment variables are appropriately passed to the
underlying docker run call
- """
- # NOTE: It's important that these variables are passed to the
underlying docker container
- # These variables are passed to the container so the process tree
killer can find runaway
- # process inside the container
- # https://wiki.jenkins.io/display/JENKINS/ProcessTreeKiller
- #
https://github.com/jenkinsci/jenkins/blob/578d6bacb33a5e99f149de504c80275796f0b231/core/src/main/java/hudson/model/Run.java#L2393
-
- jenkins_vars = {
- "BUILD_NUMBER": "BUILD_NUMBER_5",
- "BUILD_ID": "BUILD_ID_1",
- "BUILD_TAG": "BUILD_TAG_7"
- }
- mock_container = create_mock_container()
-
- # Test environment is empty if the jenkins vars are not present
- with MockDockerClient(mock_container) as mock_client:
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command") == 0
- assert mock_client.containers.run.call_count == 1
- _, kwargs = mock_client.containers.run.call_args
- assert kwargs["environment"] == {}
-
- # Test environment contains jenkins env vars if they are present
- with MockDockerClient(mock_container) as mock_client:
- with patch.dict(os.environ, jenkins_vars):
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command") == 0
- assert mock_client.containers.run.call_count == 1
- _, kwargs = mock_client.containers.run.call_args
- assert kwargs["environment"] == jenkins_vars
-
- # Test jenkins env vars are added to callers env vars
- user_env = {"key1": "value1", "key2": "value2"}
- with MockDockerClient(mock_container) as mock_client:
- with patch.dict(os.environ, jenkins_vars):
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command",
environment=user_env) == 0
- assert mock_client.containers.run.call_count == 1
- _, kwargs = mock_client.containers.run.call_args
- assert kwargs["environment"] == {**jenkins_vars, **user_env}
-
- def test_run_args_kwargs_passed(self):
- """
- Tests args and kwargs are passed to the container run call
- """
- mock_container = create_mock_container()
-
- # Test detach=True is passed in even if not specified
- with MockDockerClient(mock_container) as mock_client:
- safe_docker = SafeDockerClient()
- assert safe_docker.run(
- "image",
- "command",
- "another_arg",
- str_param="value",
- bool_param=True,
- none_param=None,
- int_param=5,
- float_param=5.2,
- list_param=["this", "is", "a", "list"],
- map_param={
- "a": "5",
- "b": True,
- "c": 2
- }) == 0
- assert mock_client.containers.run.call_args_list == [
- call(
- "image",
- "command",
- "another_arg",
- detach=True,
- environment={},
- str_param="value",
- bool_param=True,
- none_param=None,
- int_param=5,
- float_param=5.2,
- list_param=["this", "is", "a", "list"],
- map_param={
- "a": "5",
- "b": True,
- "c": 2
- }
- )
- ]
-
- def test_container_returns_non_zero_status_code(self):
- """
- Tests non-zero code from container is returned and the container
- is cleaned up
- """
- mock_container = create_mock_container(status_code=10)
- with MockDockerClient(mock_container):
- safe_docker = SafeDockerClient()
- # check return code and that container gets cleaned up
- assert safe_docker.run("image", "command") == 10
- assert mock_container.stop.call_count == 1
- assert mock_container.remove.call_count == 1
- assert len(safe_docker._containers) == 0
-
- def test_container_wait_raises_returns_150(self):
- """
- Tests 150 is returned if an error is raised when calling container.wait
- """
- mock_container = create_mock_container()
- mock_container.wait.side_effect = RuntimeError("Something bad
happened")
- with MockDockerClient(mock_container):
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command") == 150
-
- def test_container_stop_raises_returns_151(self):
- """
- Tests 151 is returned if an error is raised when calling container.stop
- """
- mock_container = create_mock_container()
- mock_container.stop.side_effect = RuntimeError("Something bad
happened")
- with MockDockerClient(mock_container):
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command") == 151
-
- def test_container_remove_raises_returns_152(self):
- """
- Tests 152 is returned if an error is raised when calling
container.remove
- """
- mock_container = create_mock_container()
- mock_container.remove.side_effect = RuntimeError("Something bad
happened")
- with MockDockerClient(mock_container):
- safe_docker = SafeDockerClient()
- assert safe_docker.run("image", "command") == 152
-
- def test_main(self):
- """
- Tests main function against different command line arguments
- """
- tests = [
- # ( supplied command line arguments, expected call )
- (
- ["image", "command"],
- call("image", command="command", runtime=None, user=None,
name=None, volumes={}, cap_add=[])
- ),
- (
- ["image", "command", "arg1", "arg2"],
- call("image", command="command arg1 arg2", runtime=None,
user=None, name=None, volumes={}, cap_add=[])
- ),
- (
- ["--runtime", "nvidia", "image", "command"],
- call("image", command="command", runtime="nvidia", user=None,
name=None, volumes={}, cap_add=[])
- ),
- (
- ["--user", "1001:1001", "image", "command"],
- call("image", command="command", runtime=None,
user="1001:1001", name=None, volumes={}, cap_add=[])
- ),
- ([
- "--volume", "/local/path1:/container/path1",
- "--volume", "/local/path2:/container/path2:ro",
- "image",
- "command"
- ], call("image", command="command", runtime=None, user=None,
name=None, volumes={
- "/local/path1": {
- "bind": "/container/path1",
- "mode": "rw"
- },
- "/local/path2": {
- "bind": "/container/path2",
- "mode": "ro"
- }
- }, cap_add=[])),
- ([
- "--runtime", "nvidia",
- "-u", "1001:1001",
- "-v", "/local/path1:/container/path1",
- "-v", "/local/path2:/container/path2:ro",
- "--cap-add", "bob",
- "--cap-add", "jimmy",
- "--name",
- "container_name",
- "image",
- "command",
- "arg1",
- "arg2"
- ], call(
- "image",
- command="command arg1 arg2",
- runtime="nvidia",
- user="1001:1001",
- name="container_name",
- volumes={
- "/local/path1": {
- "bind": "/container/path1",
- "mode": "rw"
- },
- "/local/path2": {
- "bind": "/container/path2",
- "mode": "ro"
- }
- }, cap_add=["bob", "jimmy"])
- )
- ]
-
- # Tests valid arguments
- mock_docker = create_autospec(SafeDockerClient)
- mock_docker.run.return_value = 0
- with patch("safe_docker_run.SafeDockerClient",
return_value=mock_docker):
- for test in tests:
- arguments, expected_call = test
- main(arguments)
- assert mock_docker.run.call_args == expected_call
-
- # Tests invalid arguments
- tests = [
- [],
- None,
- ["image"],
- # Test some bad volume mounts
- ["-v", "bob", "image", "args"],
- ["-v", "/local/path", "image", "args"],
- ["-v", "/local/path:/container/path:blah", "image", "args"],
- ["-v", "", "image", "args"],
- ["-v", "a:b:c:d", "image", "args"]
- ]
-
- mock_docker = create_autospec(SafeDockerClient)
- with patch("safe_docker_run.SafeDockerClient",
return_value=mock_docker):
- with self.assertRaises(SystemExit):
- for test in tests:
- main(test)
-
- def test_clean_up(self):
- """
- Tests container clean up in case of SIGTERM and SIGINT
- """
- import subprocess
- import time
- import docker.errors
-
- docker_client = docker.from_env()
- container_name = "safedockertestcontainer1234"
-
- def get_container(name: str) -> Optional[Container]:
- try:
- return docker_client.containers.get(name)
- except docker.errors.NotFound:
- return None
-
- def remove_container_if_exists(name: str):
- container = get_container(name)
- if container:
- container.stop()
- container.remove()
-
- def wait_for_container(name: str) -> bool:
- for _ in itertools.count(5):
- if get_container(name):
- return True
- time.sleep(1)
- return False
-
- # Clear any containers with container name
- remove_container_if_exists(container_name)
-
- # None => not signal is emitted - we should still finish with no
containers at the end due
- # to the atexit
- for sig in [None, signal.SIGTERM, signal.SIGINT]:
- # Execute the safe docker run script in a different process
- proc = subprocess.Popen(['./safe_docker_run.py', "--name",
container_name, "ubuntu:18.04", "sleep 10"])
- # NOTE: we need to wait for the container to come up as not all
operating systems support blocking signals
- if wait_for_container(container_name) is False:
- raise RuntimeError("Test container did not come up")
-
- # Issue the signal and wait for the process to finish
- if sig:
- proc.send_signal(sig)
- proc.wait()
-
- # The container should no longer exist
- assert get_container(container_name) is None
-
-
-if __name__ == '__main__':
- import nose
- nose.main()