KellenSunderland commented on a change in pull request #12276: [Don't
merge][Review] A solution to prevent zombie containers locally and in CI
URL: https://github.com/apache/incubator-mxnet/pull/12276#discussion_r211862717
##########
File path: ci/build.py
##########
@@ -158,63 +246,142 @@ def default_ccache_dir() -> str:
return ccache_dir
return os.path.join(tempfile.gettempdir(), "ci_ccache")
+def trim_container_id(cid):
+ return cid[:12]
def container_run(platform: str,
- docker_binary: str,
+ nvidia_runtime: bool,
docker_registry: str,
shared_memory_size: str,
- local_ccache_dir: str,
command: List[str],
+ local_ccache_dir: str,
+ cleanup: Cleanup,
dry_run: bool = False,
- interactive: bool = False) -> str:
+ interactive: bool = False) -> int:
+ CONTAINER_WAIT_S = 600
+ #
+ # Environment setup
+ #
+ environment = {
+ 'CCACHE_MAXSIZE': '500G',
+ 'CCACHE_TEMPDIR': '/tmp/ccache', # temp dir should be local and not
shared
+ 'CCACHE_DIR': '/work/ccache', # this path is inside the container as
/work/ccache is mounted
+ 'CCACHE_LOGFILE': '/tmp/ccache.log', # a container-scoped log, useful
for ccache verification.
+ }
+ # These variables are passed to the container to the process tree killer
can find runaway process inside the container
+ # https://wiki.jenkins.io/display/JENKINS/ProcessTreeKiller
+ #
https://github.com/jenkinsci/jenkins/blob/578d6bacb33a5e99f149de504c80275796f0b231/core/src/main/java/hudson/model/Run.java#L2393
+ #
+ JENKINS_ENV_VARS = ['BUILD_NUMBER', 'BUILD_ID', 'BUILD_TAG']
+ environment.update({k: os.environ[k] for k in JENKINS_ENV_VARS if k in
os.environ})
+ environment.update({k: os.environ[k] for k in ['CCACHE_MAXSIZE'] if k in
os.environ})
+
tag = get_docker_tag(platform=platform, registry=docker_registry)
mx_root = get_mxnet_root()
local_build_folder = buildir()
# We need to create it first, otherwise it will be created by the docker
daemon with root only permissions
os.makedirs(local_build_folder, exist_ok=True)
os.makedirs(local_ccache_dir, exist_ok=True)
logging.info("Using ccache directory: %s", local_ccache_dir)
- runlist = [docker_binary, 'run', '--rm', '-t',
+ docker_client = docker.from_env()
+ # Equivalent command
+ docker_cmd_list = [get_docker_binary(nvidia_runtime), 'run',
+ '--rm',
'--shm-size={}'.format(shared_memory_size),
'-v', "{}:/work/mxnet".format(mx_root), # mount mxnet root
'-v', "{}:/work/build".format(local_build_folder), # mount
mxnet/build for storing build artifacts
'-v', "{}:/work/ccache".format(local_ccache_dir),
'-u', '{}:{}'.format(os.getuid(), os.getgid()),
- '-e', 'CCACHE_MAXSIZE={}'.format(CCACHE_MAXSIZE),
'-e', 'CCACHE_TEMPDIR=/tmp/ccache', # temp dir should be local
and not shared
'-e', "CCACHE_DIR=/work/ccache", # this path is inside the
container as /work/ccache is mounted
'-e', "CCACHE_LOGFILE=/tmp/ccache.log", # a container-scoped
log, useful for ccache verification.
+ '-ti',
tag]
- runlist.extend(command)
- cmd = '\\\n\t'.join(runlist)
- ret = 0
- if not dry_run and not interactive:
- logging.info("Running %s in container %s", command, tag)
- logging.info("Executing:\n%s\n", cmd)
- ret = call(runlist)
-
- docker_run_cmd = ' '.join(runlist)
- if not dry_run and interactive:
- into_cmd = deepcopy(runlist)
- # -ti can't be after the tag, as is interpreted as a command so hook
it up after the -u argument
- idx = into_cmd.index('-u') + 2
- into_cmd[idx:idx] = ['-ti']
- cmd = '\\\n\t'.join(into_cmd)
- logging.info("Executing:\n%s\n", cmd)
- docker_run_cmd = ' '.join(into_cmd)
- ret = call(into_cmd)
-
- if not dry_run and not interactive and ret != 0:
- logging.error("Running of command in container failed (%s):\n%s\n",
ret, cmd)
- logging.error("You can get into the container by adding the -i option")
- raise subprocess.CalledProcessError(ret, cmd)
-
- return docker_run_cmd
+ docker_cmd_list.extend(command)
+ docker_cmd = ' \\\n\t'.join(docker_cmd_list)
Review comment:
Would you be open to not putting these on different lines with \ ? I've had
to copy and paste these and each time I have to paste it into a text editor,
removed the \ and newlines.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services