Repository: aurora Updated Branches: refs/heads/master b099e2f0f -> d56f8c644
Tear down the observer in case of on unhandled errors I was not able to manually trigger the root cause of AURORA-1801 by altering the Mesos filesystem layout. I have therefore adopted the general teardown idea. Example output (using a hardcoded throw): ``` Bottle v0.11.6 server starting up (using CherryPyServer())... Listening on http://192.168.33.7:1338/ Hit Ctrl-C to quit. E1106 23:03:36.722500 8699 exceptional.py:41] Unhandled error in thread Thread-1 [TID=8705]. Tearing down. Traceback (most recent call last): File "apache/thermos/common/exceptional.py", line 37, in _excepting_run self.__real_run(*args, **kw) File "apache/thermos/observer/task_observer.py", line 135, in run self._detector.refresh() File "apache/thermos/observer/detector.py", line 74, in refresh self._refresh_detectors() File "apache/thermos/observer/detector.py", line 58, in _refresh_detectors new_paths = set(self._path_detector.get_paths()) File "apache/aurora/executor/common/path_detector.py", line 35, in get_paths return list(set(path for path in iterate() if os.path.exists(path))) File "apache/aurora/executor/common/path_detector.py", line 35, in <genexpr> return list(set(path for path in iterate() if os.path.exists(path))) File "apache/aurora/executor/common/path_detector.py", line 34, in iterate raise RuntimeError("Fail on purpose...") RuntimeError: Fail on purpose... I1106 23:03:42.513900 8728 static_assets.py:34] detecting assets... I1106 23:03:42.541809 8728 static_assets.py:38] detected asset: observer.js I1106 23:03:42.542799 8728 static_assets.py:38] detected asset: bootstrap.css I1106 23:03:42.543728 8728 static_assets.py:38] detected asset: jquery.pailer.js I1106 23:03:42.544576 8728 static_assets.py:38] detected asset: jquery.js I1106 23:03:42.548482 8728 static_assets.py:38] detected asset: favicon.ico Bottle v0.11.6 server starting up (using CherryPyServer())... Listening on http://192.168.33.7:1338/ Hit Ctrl-C to quit. ``` Bugs closed: AURORA-1801 Reviewed at https://reviews.apache.org/r/53519/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/d56f8c64 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/d56f8c64 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/d56f8c64 Branch: refs/heads/master Commit: d56f8c64466a94a990db3308a3130d3fce0584af Parents: b099e2f Author: Stephan Erb <[email protected]> Authored: Thu Nov 24 16:37:51 2016 +0100 Committer: Stephan Erb <[email protected]> Committed: Thu Nov 24 16:37:51 2016 +0100 ---------------------------------------------------------------------- .../apache/aurora/tools/thermos_observer.py | 24 ++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/d56f8c64/src/main/python/apache/aurora/tools/thermos_observer.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/tools/thermos_observer.py b/src/main/python/apache/aurora/tools/thermos_observer.py index 0a07df7..4bba019 100644 --- a/src/main/python/apache/aurora/tools/thermos_observer.py +++ b/src/main/python/apache/aurora/tools/thermos_observer.py @@ -14,9 +14,12 @@ """A Mesos-customized entry point to the thermos_observer webserver.""" +import sys +import thread +import threading import time -from twitter.common import app +from twitter.common import app, log from twitter.common.exceptions import ExceptionalThread from twitter.common.log.options import LogOptions from twitter.common.quantity import Amount, Time @@ -69,17 +72,30 @@ def initialize(options): return TaskObserver(path_detector, interval=polling_interval) +def handle_error(exc_type, value, traceback): + """ Tear down the observer in case of unhandled errors. + + By using ExceptionalThread throughout the observer we have ensured that sys.excepthook will + be called for every unhandled exception, even for those not originating in the main thread. + """ + log.error("An unhandled error occured. Tearing down.", exc_info=(exc_type, value, traceback)) + # TODO: In Python 3.4 we will be able to use threading.main_thread() + if not isinstance(threading.current_thread(), threading._MainThread): + thread.interrupt_main() + + def main(_, options): observer = initialize(options) observer.start() root_server = configure_server(observer) - thread = ExceptionalThread(target=lambda: root_server.run(options.ip, options.port, 'cherrypy')) - thread.daemon = True - thread.start() + server = ExceptionalThread(target=lambda: root_server.run(options.ip, options.port, 'cherrypy')) + server.daemon = True + server.start() sleep_forever() +sys.excepthook = handle_error LogOptions.set_stderr_log_level('google:INFO') app.main()
