KellenSunderland commented on a change in pull request #12276: [Don't
merge][Review] A solution to prevent zombie containers locally and in CI
URL: https://github.com/apache/incubator-mxnet/pull/12276#discussion_r211865036
##########
File path: ci/build.py
##########
@@ -37,17 +37,118 @@
import platform
from copy import deepcopy
from itertools import chain
-from subprocess import call, check_call
+from subprocess import call, check_call, check_output
from typing import *
from util import *
+import docker
+import docker.models
+import docker.errors
+import signal
+import atexit
+import pprint
+import requests
+
+
+class Cleanup:
+ """A class to cleanup containers"""
+ def __init__(self):
+ self.containers = set()
+ self.docker_stop_timeout = 3
+
+ def add_container(self, container: docker.models.containers.Container):
+ assert isinstance(container, docker.models.containers.Container)
+ self.containers.add(container)
+
+ def remove_container(self, container: docker.models.containers.Container):
+ assert isinstance(container, docker.models.containers.Container)
+ self.containers.remove(container)
+
+ def _cleanup_containers(self):
+ if self.containers:
+ logging.warning("Cleaning up containers")
+ else:
+ return
+ docker_client = docker.from_env()
+ try:
+ stop_timeout = int(os.environ.get("DOCKER_STOP_TIMEOUT",
self.docker_stop_timeout))
+ except Exception as e:
+ stop_timeout = 3
+ for container in self.containers:
+ try:
+ container.stop(timeout=stop_timeout)
+ logging.info("☠: stopped container %s",
trim_container_id(container.id))
+ container.remove()
+ logging.info("🚽: removed container %s",
trim_container_id(container.id))
+ except Exception as e:
+ logging.exception(e)
+ #pass
+ self.containers.clear()
+ logging.info("Cleaning up containers finished.")
+
+ def __call__(self):
+ """Perform cleanup"""
+ self._cleanup_containers()
+
+
+
+def retry(ExceptionToCheck, tries=4, delay_s=1, backoff=2):
+ """Retry calling the decorated function using an exponential backoff.
+
+ http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+ original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
+
+ :param ExceptionToCheck: the exception to check. may be a tuple of
+ exceptions to check
+ :type ExceptionToCheck: Exception or tuple
+ :param tries: number of times to try (not retry) before giving up
+ :type tries: int
+ :param delay_s: initial delay between retries in seconds
+ :type delay_s: int
+ :param backoff: backoff multiplier e.g. value of 2 will double the delay
+ each retry
+ :type backoff: int
+ """
+ import time
+ from functools import wraps
+ def decorated_retry(f):
+ @wraps(f)
+ def f_retry(*args, **kwargs):
+ mtries, mdelay = tries, delay_s
+ while mtries > 1:
+ try:
+ return f(*args, **kwargs)
+ except ExceptionToCheck as e:
+ logging.warning("Exception: %s, Retrying in %d
seconds...", str(e), mdelay)
+ time.sleep(mdelay)
+ mtries -= 1
+ mdelay *= backoff
+ return f(*args, **kwargs)
+
+ return f_retry # true decorator
+
+ return decorated_retry
-CCACHE_MAXSIZE = '500G'
def under_ci() -> bool:
""":return: True if we run in Jenkins."""
return 'JOB_NAME' in os.environ
-def get_platforms(path: Optional[str] = "docker"):
+
+def git_cleanup() -> None:
+ """Clean repo and subrepos, update subrepos"""
+ logging.info("cleaning up repository")
+ with remember_cwd():
+ os.chdir(get_mxnet_root())
+ check_call(['git', 'clean', '-ffdx'])
+ check_call(['git', 'submodule', 'foreach', '--recursive', 'git',
'clean', '-ffdx'])
+ check_call(['git', 'submodule', 'update', '--recursive', '--init'])
+
+
+def get_dockerfiles_path():
+ return "docker"
+
+
+def get_platforms(path: Optional[str] = get_dockerfiles_path()):
Review comment:
Return val.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services