[2/2] incubator-airflow git commit: Merge pull request #2136 from gwax/update-gitignore
Merge pull request #2136 from gwax/update-gitignore Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/8e2003e3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/8e2003e3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/8e2003e3 Branch: refs/heads/master Commit: 8e2003e37a105be214266bb24d1104c32cc72816 Parents: e423981 3d3c148 Author: Arthur Wiedmer Authored: Thu Mar 9 11:52:10 2017 -0800 Committer: Arthur Wiedmer Committed: Thu Mar 9 11:52:10 2017 -0800 -- .gitignore | 144 ++-- 1 file changed, 119 insertions(+), 25 deletions(-) --
[1/2] incubator-airflow git commit: AIRFLOW-959 Cleanup and reorganize .gitignore
Repository: incubator-airflow Updated Branches: refs/heads/master e42398100 -> 8e2003e37 AIRFLOW-959 Cleanup and reorganize .gitignore Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/3d3c1485 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/3d3c1485 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/3d3c1485 Branch: refs/heads/master Commit: 3d3c1485a7cc480f58609f97e287a1e263638dbe Parents: e423981 Author: George Leslie-Waksman Authored: Thu Mar 9 10:49:02 2017 -0800 Committer: George Leslie-Waksman Committed: Thu Mar 9 11:37:49 2017 -0800 -- .gitignore | 144 ++-- 1 file changed, 119 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/3d3c1485/.gitignore -- diff --git a/.gitignore b/.gitignore index 694a561..f0e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,32 +1,126 @@ -.idea/* -*.bkp -*.egg-info -*.pyc -.DS_Store -.ipynb* -.coverage -.python-version -airflow/git_version -airflow/www/static/coverage/ -airflow.db +# Airflow configuration airflow.cfg +unittests.cfg airflow_login.py -build -cover dbinit.py -docs/_* -dist -env initdb.py -logs -MANIFEST secrets.py -sftp-config.json -unittests.cfg -error.log + +# Airflow sqlite databases +airflow.db unittests.db -rat-results.txt -/.eggs/ -/.tox/ -venv + +# Airflow temporary artifacts +airflow/git_version +airflow/www/static/coverage/ +logs/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +# PyCharm +.idea/ + +# vim *.swp + +# OSX +.DS_Store + +# SQL Server backups +*.bkp + +# Spark +rat-results.txt
[1/2] incubator-airflow git commit: AIRFLOW-960 Add .editorconfig file
Repository: incubator-airflow Updated Branches: refs/heads/master 8e2003e37 -> 2a6136202 AIRFLOW-960 Add .editorconfig file Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/f5caccac Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/f5caccac Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/f5caccac Branch: refs/heads/master Commit: f5caccacd01bfc51d2b1d268886e5ab0b29497be Parents: e423981 Author: George Leslie-Waksman Authored: Thu Mar 9 11:23:06 2017 -0800 Committer: George Leslie-Waksman Committed: Thu Mar 9 11:36:41 2017 -0800 -- .editorconfig | 32 1 file changed, 32 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/f5caccac/.editorconfig -- diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000..a80bd65 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,32 @@ +root = true + +[*] +end_of_line = lf +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 + +[*.py] +indent_size = 4 + +[*.sh] +indent_size = 4 + +[*.sql] +indent_size = 4 + +[*.js] +indent_size = 2 + +[*.css] +indent_size = 2 + +[*.{md,rst}] +indent_size = 2 + +[*.{yml,yaml}] +indent_size = 2 + +[*.{htm,html}] +indent_size = 2
[2/2] incubator-airflow git commit: Merge pull request #2137 from gwax/editorconfig
Merge pull request #2137 from gwax/editorconfig Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/2a613620 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/2a613620 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/2a613620 Branch: refs/heads/master Commit: 2a6136202d3ab4a92dda892d100c2820c0b45702 Parents: 8e2003e f5cacca Author: Arthur Wiedmer Authored: Thu Mar 9 11:53:57 2017 -0800 Committer: Arthur Wiedmer Committed: Thu Mar 9 11:53:57 2017 -0800 -- .editorconfig | 32 1 file changed, 32 insertions(+) --
[2/2] incubator-airflow git commit: Merge pull request #2150 from artwr/artwr-fix_another_use_of_eval
Merge pull request #2150 from artwr/artwr-fix_another_use_of_eval Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/c44e2009 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/c44e2009 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/c44e2009 Branch: refs/heads/master Commit: c44e2009ee625ce4a82c50e585a3c8617d9b4ff8 Parents: ed03bb7 2bf52ab Author: Arthur Wiedmer Authored: Tue Mar 14 11:39:45 2017 -0700 Committer: Arthur Wiedmer Committed: Tue Mar 14 11:39:45 2017 -0700 -- airflow/www/views.py | 8 1 file changed, 4 insertions(+), 4 deletions(-) --
[1/2] incubator-airflow git commit: [AIRFLOW-933] Replace eval with literal_eval to prevent RCE
Repository: incubator-airflow Updated Branches: refs/heads/master ed03bb719 -> c44e2009e [AIRFLOW-933] Replace eval with literal_eval to prevent RCE Replace eval with a literal eval to help prevent arbitrary code execution on the webserver host. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/2bf52ab1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/2bf52ab1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/2bf52ab1 Branch: refs/heads/master Commit: 2bf52ab16960f00cb9a98ba455d5851aabf6305f Parents: ed03bb7 Author: Arthur Wiedmer Authored: Tue Mar 14 10:40:23 2017 -0700 Committer: Arthur Wiedmer Committed: Tue Mar 14 10:40:23 2017 -0700 -- airflow/www/views.py | 8 1 file changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/2bf52ab1/airflow/www/views.py -- diff --git a/airflow/www/views.py b/airflow/www/views.py index de33843..15735b4 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -15,6 +15,7 @@ from past.builtins import basestring, unicode +import ast import os import pkg_resources import socket @@ -44,7 +45,6 @@ from flask._compat import PY2 import jinja2 import markdown import nvd3 -import ast from wtforms import ( Form, SelectField, TextAreaField, PasswordField, StringField, validators) @@ -231,8 +231,8 @@ def data_profiling_required(f): @wraps(f) def decorated_function(*args, **kwargs): if ( -current_app.config['LOGIN_DISABLED'] or -(not current_user.is_anonymous() and current_user.data_profiling()) +current_app.config['LOGIN_DISABLED'] or +(not current_user.is_anonymous() and current_user.data_profiling()) ): return f(*args, **kwargs) else: @@ -312,7 +312,7 @@ class Airflow(BaseView): # Processing templated fields try: -args = eval(chart.default_params) +args = ast.literal_eval(chart.default_params) if type(args) is not type(dict()): raise AirflowException('Not a dict') except:
[2/2] incubator-airflow git commit: Merge pull request #2162 from artwr/artwr-update_setup_cfg
Merge pull request #2162 from artwr/artwr-update_setup_cfg Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/e08b102c Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/e08b102c Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/e08b102c Branch: refs/heads/master Commit: e08b102c002eb0f8f08229063738f9a94a186278 Parents: ca16233 75cd460 Author: Arthur Wiedmer Authored: Thu Mar 16 14:38:23 2017 -0700 Committer: Arthur Wiedmer Committed: Thu Mar 16 14:38:23 2017 -0700 -- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --
[1/2] incubator-airflow git commit: [AIRFLOW-997] Update setup.cfg to point to Apache
Repository: incubator-airflow Updated Branches: refs/heads/master ca1623386 -> e08b102c0 [AIRFLOW-997] Update setup.cfg to point to Apache The setup.cfg should point to the Apache PMC as "author" and the dev mailing list as contact email. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/75cd460c Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/75cd460c Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/75cd460c Branch: refs/heads/master Commit: 75cd460ce8e557c44c0c79245105fd535b037210 Parents: 3d6095f Author: Arthur Wiedmer Authored: Thu Mar 16 13:35:05 2017 -0700 Committer: Arthur Wiedmer Committed: Thu Mar 16 13:35:13 2017 -0700 -- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/75cd460c/setup.cfg -- diff --git a/setup.cfg b/setup.cfg index 0dd2c39..76ddf37 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,8 +15,8 @@ name = Airflow summary = Airflow is a system to programmatically author, schedule and monitor data pipelines. description-file = README.md -author = Maxime Beauchemin -author-email = maximebeauche...@gmail.com +author = Apache Airflow PMC +author-email = d...@airflow.incubator.apache.org license = Apache License, Version 2.0 [files]
incubator-airflow git commit: [AIRFLOW-999] Add support for Redis database
Repository: incubator-airflow Updated Branches: refs/heads/master 23a16f7ad -> 8de850162 [AIRFLOW-999] Add support for Redis database This PR includes a redis_hook and a redis_key_sensor to enable checking for key existence in redis. It also updates the documentation and add the relevant unit tests. - [x] Opened a PR on Github - [x] My PR addresses the following Airflow JIRA issues: - https://issues.apache.org/jira/browse/AIRFLOW-999 - [x] The PR title references the JIRA issues. For example, "[AIRFLOW-1] My Airflow PR" - [x] My PR adds unit tests - [ ] __OR__ my PR does not need testing for this extremely good reason: - [x] Here are some details about my PR: - [ ] Here are screenshots of any UI changes, if appropriate: - [x] Each commit subject references a JIRA issue. For example, "[AIRFLOW-1] Add new feature" - [x] Multiple commits addressing the same JIRA issue have been squashed - [x] My commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git- commit/)": 1. Subject is separated from body by a blank line 2. Subject is limited to 50 characters 3. Subject does not end with a period 4. Subject uses the imperative mood ("add", not "adding") 5. Body wraps at 72 characters 6. Body explains "what" and "why", not "how" Closes #2165 from msempere/AIRFLOW-999/support- for-redis-database Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/8de85016 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/8de85016 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/8de85016 Branch: refs/heads/master Commit: 8de85016265443987a0e0fff406e996d421dc9d6 Parents: 23a16f7 Author: MSempere Authored: Mon Mar 20 11:10:55 2017 -0700 Committer: Arthur Wiedmer Committed: Mon Mar 20 11:11:31 2017 -0700 -- airflow/contrib/hooks/redis_hook.py | 92 airflow/contrib/sensors/redis_key_sensor.py | 46 airflow/models.py | 4 ++ airflow/utils/db.py | 5 ++ docs/installation.rst | 2 + setup.py| 2 + tests/contrib/hooks/test_redis_hook.py | 46 tests/contrib/sensors/redis_sensor.py | 64 + 8 files changed, 261 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/8de85016/airflow/contrib/hooks/redis_hook.py -- diff --git a/airflow/contrib/hooks/redis_hook.py b/airflow/contrib/hooks/redis_hook.py new file mode 100644 index 000..936eff8 --- /dev/null +++ b/airflow/contrib/hooks/redis_hook.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +RedisHook module +""" + +import logging + +from redis import StrictRedis + +from airflow.exceptions import AirflowException +from airflow.hooks.base_hook import BaseHook + + +class RedisHook(BaseHook): +""" +Hook to interact with Redis database +""" +def __init__(self, redis_conn_id='redis_default'): +""" +Prepares hook to connect to a Redis database. + +:param conn_id: the name of the connection that has the parameters +we need to connect to Redis. +""" +self.redis_conn_id = redis_conn_id +self.client = None +conn = self.get_connection(self.redis_conn_id) +self.host = conn.host +self.port = int(conn.port) +self.password = conn.password +self.db = int(conn.extra_dejson.get('db', 0)) +self.logger = logging.getLogger(__name__) +self.logger.debug( +'''Connection "{conn}": +\thost: {host} +\tport: {port} +\textra: {extra} +'''.format( +conn=self.redis_conn_id, +host=self.host, +port=self.port, +extra=conn.ex
incubator-airflow git commit: [AIRFLOW-1007] Use Jinja sandbox for chart_data endpoint
Repository: incubator-airflow Updated Branches: refs/heads/master b55f41f2c -> daa281c03 [AIRFLOW-1007] Use Jinja sandbox for chart_data endpoint Right now, users can put in arbitrary strings into the chart_data endpoint, and execute arbitrary code using the chart_data endpoint. By using literal_eval and ImmutableSandboxedEnvironment, we can reduce RCE. Right now, users can put in arbitrary strings into the chart_data endpoint, and execute arbitrary code using the chart_data endpoint. By using literal_eval and ImmutableSandboxedEnvironment, we can prevent RCE. Dear Airflow maintainers, Please accept this PR. I understand that it will not be reviewed until I have checked off all the steps below! ### JIRA - [x] My PR addresses the following [Airflow JIRA] (https://issues.apache.org/jira/browse/AIRFLOW/) issues and references them in the PR title. For example, "[AIRFLOW-XXX] My Airflow PR" - https://issues.apache.org/jira/browse/AIRFLOW-1007 ### Description - [x] I changed Jinja to use the ImmutableSandboxedEnvironment, and used literal_eval, to limit the amount of RCE. ### Tests - [x] My PR adds the following unit tests: SecurityTest chart_data tests ### Commits - [x] My commits all reference JIRA issues in their subject lines, and I have squashed multiple commits if they address the same issue. In addition, my commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git- commit/)": 1. Subject is separated from body by a blank line 2. Subject is limited to 50 characters 3. Subject does not end with a period 4. Subject uses the imperative mood ("add", not "adding") 5. Body wraps at 72 characters 6. Body explains "what" and "why", not "how" to: aoen plypaul artwr bolkedebruin Closes #2184 from saguziel/aguziel-jinja-2 Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/daa281c0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/daa281c0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/daa281c0 Branch: refs/heads/master Commit: daa281c0364609d6812921123cf47e4118b40484 Parents: b55f41f Author: Alex Guziel Authored: Mon Apr 3 12:16:00 2017 -0700 Committer: Arthur Wiedmer Committed: Mon Apr 3 12:16:00 2017 -0700 -- airflow/www/views.py | 7 --- tests/core.py| 38 ++ 2 files changed, 42 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/daa281c0/airflow/www/views.py -- diff --git a/airflow/www/views.py b/airflow/www/views.py index 0def0a9..a9bab31 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -43,7 +43,7 @@ from flask_admin.tools import iterdecode from flask_login import flash from flask._compat import PY2 -import jinja2 +from jinja2.sandbox import ImmutableSandboxedEnvironment import markdown import nvd3 @@ -328,8 +328,9 @@ class Airflow(BaseView): request_dict = {k: request.args.get(k) for k in request.args} args.update(request_dict) args['macros'] = macros -sql = jinja2.Template(chart.sql).render(**args) -label = jinja2.Template(chart.label).render(**args) +sandbox = ImmutableSandboxedEnvironment() +sql = sandbox.from_string(chart.sql).render(**args) +label = sandbox.from_string(chart.label).render(**args) payload['sql_html'] = Markup(highlight( sql, lexers.SqlLexer(), # Lexer call http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/daa281c0/tests/core.py -- diff --git a/tests/core.py b/tests/core.py index bd52d19..997bb42 100644 --- a/tests/core.py +++ b/tests/core.py @@ -63,6 +63,8 @@ from airflow.utils.logging import LoggingMixin from lxml import html from airflow.exceptions import AirflowException from airflow.configuration import AirflowConfigException, run_command +from jinja2.sandbox import SecurityError +from jinja2 import UndefinedError import six @@ -1469,6 +1471,42 @@ class SecurityTests(unittest.TestCase): response = self.app.get("/admin/log", follow_redirects=True) self.assertIn(bleach.clean("alert(123456)"), response.data.decode('UTF-8')) +def test_chart_data_template(self): +"""Protect chart_data from being able to do RCE.""" +session = settings.Session() +Chart = models.Chart +chart1 = Chart( +label='insecure_chart', +
[2/2] incubator-airflow git commit: Merge pull request #2185 from saguziel/aguziel-celery-fix
Merge pull request #2185 from saguziel/aguziel-celery-fix Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/75addb4a Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/75addb4a Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/75addb4a Branch: refs/heads/master Commit: 75addb4a9fba57df39d59d26d15831080fb30ef0 Parents: c64e876 34ee1dc Author: Arthur Wiedmer Authored: Mon Apr 3 15:42:16 2017 -0700 Committer: Arthur Wiedmer Committed: Mon Apr 3 15:42:16 2017 -0700 -- airflow/executors/celery_executor.py | 3 +++ 1 file changed, 3 insertions(+) --
[1/2] incubator-airflow git commit: [AIRFLOW-1038] Specify celery serialization options explicitly
Repository: incubator-airflow Updated Branches: refs/heads/master c64e876bd -> 75addb4a9 [AIRFLOW-1038] Specify celery serialization options explicitly Specify the CELERY_TASK_SERIALIZER and CELERY_RESULT_SERIALIZER as pickle explicitly, and CELERY_EVENT_SERIALIZER as json. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/34ee1dc0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/34ee1dc0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/34ee1dc0 Branch: refs/heads/master Commit: 34ee1dc0373708f7db0a562ac470338c6126d20a Parents: b2b9587 Author: Alex Guziel Authored: Fri Mar 24 11:51:39 2017 -0700 Committer: Alex Guziel Committed: Mon Apr 3 15:33:56 2017 -0700 -- airflow/executors/celery_executor.py | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/34ee1dc0/airflow/executors/celery_executor.py -- diff --git a/airflow/executors/celery_executor.py b/airflow/executors/celery_executor.py index 04414fb..e0c94c1 100644 --- a/airflow/executors/celery_executor.py +++ b/airflow/executors/celery_executor.py @@ -36,6 +36,9 @@ DEFAULT_QUEUE = configuration.get('celery', 'DEFAULT_QUEUE') class CeleryConfig(object): CELERY_ACCEPT_CONTENT = ['json', 'pickle'] +CELERY_EVENT_SERIALIZER = 'json' +CELERY_RESULT_SERIALIZER = 'pickle' +CELERY_TASK_SERIALIZER = 'pickle' CELERYD_PREFETCH_MULTIPLIER = 1 CELERY_ACKS_LATE = True BROKER_URL = configuration.get('celery', 'BROKER_URL')
incubator-airflow git commit: [AIRFLOW-1067] use example.com in examples
Repository: incubator-airflow Updated Branches: refs/heads/master 8fdfb16cc -> 70f1bf10a [AIRFLOW-1067] use example.com in examples We use airf...@airflow.com in examples. However, https://airflow.com is owned by a company named Airflow (selling fans, etc). We should use airf...@example.com instead. That domain is created for this purpose. Closes #2217 from mengxr/AIRFLOW-1067 Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/70f1bf10 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/70f1bf10 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/70f1bf10 Branch: refs/heads/master Commit: 70f1bf10a5a5ab8f7460d3c0dc5c1a6d955355de Parents: 8fdfb16 Author: Xiangrui Meng Authored: Tue Apr 4 09:22:37 2017 -0700 Committer: Arthur Wiedmer Committed: Tue Apr 4 09:22:37 2017 -0700 -- airflow/api/auth/backend/default.py| 2 +- airflow/config_templates/default_airflow.cfg | 2 +- airflow/config_templates/default_test.cfg | 2 +- .../example_dags/example_emr_job_flow_automatic_steps.py | 2 +- .../contrib/example_dags/example_emr_job_flow_manual_steps.py | 2 +- airflow/contrib/example_dags/example_qubole_operator.py| 2 +- airflow/contrib/example_dags/example_twitter_dag.py| 2 +- airflow/contrib/task_runner/__init__.py| 2 +- airflow/dag/__init__.py| 2 +- airflow/example_dags/docker_copy_data.py | 2 +- airflow/example_dags/example_docker_operator.py| 2 +- airflow/example_dags/example_http_operator.py | 2 +- airflow/example_dags/tutorial.py | 2 +- docs/scheduler.rst | 2 +- docs/tutorial.rst | 6 +++--- scripts/ci/airflow_travis.cfg | 2 +- tests/dags/test_retry_handling_job.py | 2 +- 17 files changed, 19 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/70f1bf10/airflow/api/auth/backend/default.py -- diff --git a/airflow/api/auth/backend/default.py b/airflow/api/auth/backend/default.py index 64cae86..49453ea 100644 --- a/airflow/api/auth/backend/default.py +++ b/airflow/api/auth/backend/default.py @@ -26,4 +26,4 @@ def requires_authentication(function): def decorated(*args, **kwargs): return function(*args, **kwargs) -return decorated \ No newline at end of file +return decorated http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/70f1bf10/airflow/config_templates/default_airflow.cfg -- diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index 77c65ca..b28256a 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -231,7 +231,7 @@ smtp_ssl = False # smtp_user = airflow # smtp_password = airflow smtp_port = 25 -smtp_mail_from = airf...@airflow.com +smtp_mail_from = airf...@example.com [celery] http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/70f1bf10/airflow/config_templates/default_test.cfg -- diff --git a/airflow/config_templates/default_test.cfg b/airflow/config_templates/default_test.cfg index 2d31141..2fb5bb0 100644 --- a/airflow/config_templates/default_test.cfg +++ b/airflow/config_templates/default_test.cfg @@ -65,7 +65,7 @@ smtp_host = localhost smtp_user = airflow smtp_port = 25 smtp_password = airflow -smtp_mail_from = airf...@airflow.com +smtp_mail_from = airf...@example.com [celery] celery_app_name = airflow.executors.celery_executor http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/70f1bf10/airflow/contrib/example_dags/example_emr_job_flow_automatic_steps.py -- diff --git a/airflow/contrib/example_dags/example_emr_job_flow_automatic_steps.py b/airflow/contrib/example_dags/example_emr_job_flow_automatic_steps.py index 7f57ad1..b03b36f 100644 --- a/airflow/contrib/example_dags/example_emr_job_flow_automatic_steps.py +++ b/airflow/contrib/example_dags/example_emr_job_flow_automatic_steps.py @@ -22,7 +22,7 @@ DEFAULT_ARGS = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': airflow.utils.dates.days_ago(2), -'email': ['airf...@airflow.com'], +&
[2/2] incubator-airflow git commit: Merge pull request #2128 from artwr/artwr-improve_presto_hook_error_when_cluster_is_unavailable
Merge pull request #2128 from artwr/artwr-improve_presto_hook_error_when_cluster_is_unavailable Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/f5462c78 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/f5462c78 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/f5462c78 Branch: refs/heads/master Commit: f5462c78ff38ec59ec30c688097ff5bb3b3541bb Parents: 70f1bf1 6dd4b3b Author: Arthur Wiedmer Authored: Tue Apr 4 11:20:54 2017 -0700 Committer: Arthur Wiedmer Committed: Tue Apr 4 11:20:54 2017 -0700 -- airflow/hooks/presto_hook.py | 26 -- 1 file changed, 16 insertions(+), 10 deletions(-) --
[1/2] incubator-airflow git commit: [AIRFLOW-947] Improve exceptions for unavailable Presto cluster
Repository: incubator-airflow Updated Branches: refs/heads/master 70f1bf10a -> f5462c78f [AIRFLOW-947] Improve exceptions for unavailable Presto cluster This improves error logging when the Presto cluster is unavailable and the underlying error is a 503 http response. This introspects the error to prevent trying to access the 'message' attribute when not present. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/6dd4b3bc Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/6dd4b3bc Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/6dd4b3bc Branch: refs/heads/master Commit: 6dd4b3bc1f366e1f4f4b42d6781f1caee7a5827a Parents: 70f1bf1 Author: Arthur Wiedmer Authored: Mon Mar 6 21:06:13 2017 -0800 Committer: Arthur Wiedmer Committed: Tue Apr 4 09:37:22 2017 -0700 -- airflow/hooks/presto_hook.py | 26 -- 1 file changed, 16 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/6dd4b3bc/airflow/hooks/presto_hook.py -- diff --git a/airflow/hooks/presto_hook.py b/airflow/hooks/presto_hook.py index f8f6ac8..768ff3f 100644 --- a/airflow/hooks/presto_hook.py +++ b/airflow/hooks/presto_hook.py @@ -54,6 +54,19 @@ class PrestoHook(DbApiHook): def _strip_sql(sql): return sql.strip().rstrip(';') +def _get_pretty_exception_message(self, e): +""" +Parses some DatabaseError to provide a better error message +""" +if (hasattr(e, 'message') +and 'errorName' in e.message +and 'message' in e.message): +return ('{name}: {message}'.format( +name=e.message['errorName'], +message=e.message['message'])) +else: +return str(e) + def get_records(self, hql, parameters=None): """ Get a set of records from Presto @@ -62,14 +75,7 @@ class PrestoHook(DbApiHook): return super(PrestoHook, self).get_records( self._strip_sql(hql), parameters) except DatabaseError as e: -if (hasattr(e, 'message') and -'errorName' in e.message and -'message' in e.message): -# Use the structured error data in the raised exception -raise PrestoException('{name}: {message}'.format( -name=e.message['errorName'], message=e.message['message'])) -else: -raise PrestoException(str(e)) +raise PrestoException(self._parse_exception_message(e)) def get_first(self, hql, parameters=None): """ @@ -80,7 +86,7 @@ class PrestoHook(DbApiHook): return super(PrestoHook, self).get_first( self._strip_sql(hql), parameters) except DatabaseError as e: -raise PrestoException(e[0]['message']) +raise PrestoException(self._parse_exception_message(e)) def get_pandas_df(self, hql, parameters=None): """ @@ -92,7 +98,7 @@ class PrestoHook(DbApiHook): cursor.execute(self._strip_sql(hql), parameters) data = cursor.fetchall() except DatabaseError as e: -raise PrestoException(e[0]['message']) +raise PrestoException(self._parse_exception_message(e)) column_descriptions = cursor.description if data: df = pandas.DataFrame(data)
incubator-airflow git commit: [AIRFLOW-1016] Allow HTTP HEAD request method on HTTPSensor
Repository: incubator-airflow Updated Branches: refs/heads/master 0f7ddbbed -> 4c41f6e96 [AIRFLOW-1016] Allow HTTP HEAD request method on HTTPSensor This PR provides the HEAD http method on top of GET. This is useful for getting responses without a body, and provides a lighter weight response. Closes #2175 from msempere/AIRFLOW-1016/allow- http-head-request-method-on-httpsensor Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/4c41f6e9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/4c41f6e9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/4c41f6e9 Branch: refs/heads/master Commit: 4c41f6e96e0fafd9eb409fa20fb5e62f70fa7f17 Parents: 0f7ddbb Author: MSempere Authored: Wed Apr 5 08:09:32 2017 -0700 Committer: Arthur Wiedmer Committed: Wed Apr 5 08:10:20 2017 -0700 -- airflow/hooks/http_hook.py | 7 +++- airflow/operators/sensors.py | 7 +++- tests/operators/sensors.py | 73 +++ 3 files changed, 85 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/airflow/hooks/http_hook.py -- diff --git a/airflow/hooks/http_hook.py b/airflow/hooks/http_hook.py index 7cf9a24..041328f 100644 --- a/airflow/hooks/http_hook.py +++ b/airflow/hooks/http_hook.py @@ -66,6 +66,11 @@ class HttpHook(BaseHook): url, params=data, headers=headers) +elif self.method == 'HEAD': +# HEAD doesn't use params +req = requests.Request(self.method, + url, + headers=headers) else: # Others use data req = requests.Request(self.method, @@ -100,7 +105,7 @@ class HttpHook(BaseHook): # to get reason and code for failure by checking first 3 chars # for the code, or do a split on ':' logging.error("HTTP error: " + response.reason) -if self.method != 'GET': +if self.method not in ('GET', 'HEAD'): # The sensor uses GET, so this prevents filling up the log # with the body every time the GET 'misses'. # That's ok to do, because GETs should be repeatable and http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/airflow/operators/sensors.py -- diff --git a/airflow/operators/sensors.py b/airflow/operators/sensors.py index ae50bc5..b561b49 100644 --- a/airflow/operators/sensors.py +++ b/airflow/operators/sensors.py @@ -629,6 +629,8 @@ class HttpSensor(BaseSensorOperator): :param http_conn_id: The connection to run the sensor against :type http_conn_id: string +:param method: The HTTP request method to use +:type method: string :param endpoint: The relative part of the full url :type endpoint: string :param params: The parameters to be added to the GET url @@ -650,6 +652,7 @@ class HttpSensor(BaseSensorOperator): def __init__(self, endpoint, http_conn_id='http_default', + method='GET', params=None, headers=None, response_check=None, @@ -662,7 +665,9 @@ class HttpSensor(BaseSensorOperator): self.extra_options = extra_options or {} self.response_check = response_check -self.hook = HttpHook(method='GET', http_conn_id=http_conn_id) +self.hook = HttpHook( +method=method, +http_conn_id=http_conn_id) def poke(self, context): logging.info('Poking: ' + self.endpoint) http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/tests/operators/sensors.py -- diff --git a/tests/operators/sensors.py b/tests/operators/sensors.py index e77216b..ea1e6cc 100644 --- a/tests/operators/sensors.py +++ b/tests/operators/sensors.py @@ -19,6 +19,7 @@ import sys import time import unittest +from mock import patch from datetime import datetime, timedelta from airflow import DAG, configuration @@ -94,6 +95,14 @@ class SensorTimeoutTest(unittest.TestCase): class HttpSensorTests(unittest.TestCase): +def setUp(self): +configuration.load_test_config() +args = { +'owner': 'airflow', +'start_date': DEFAULT_DATE +}
incubator-airflow git commit: [AIRFLOW-1028] Databricks Operator for Airflow
Repository: incubator-airflow Updated Branches: refs/heads/master 5a6f18f1c -> 53ca50845 [AIRFLOW-1028] Databricks Operator for Airflow Add DatabricksSubmitRun Operator In this PR, we contribute a DatabricksSubmitRun operator and a Databricks hook. This operator enables easy integration of Airflow with Databricks. In addition to the operator, we have created a databricks_default connection, an example_dag using this DatabricksSubmitRunOperator, and matching documentation. Closes #2202 from andrewmchen/databricks-operator- squashed Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/53ca5084 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/53ca5084 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/53ca5084 Branch: refs/heads/master Commit: 53ca5084561fd5c13996609f2eda6baf717249b5 Parents: 5a6f18f Author: Andrew Chen Authored: Thu Apr 6 08:30:01 2017 -0700 Committer: Arthur Wiedmer Committed: Thu Apr 6 08:30:33 2017 -0700 -- .../example_dags/example_databricks_operator.py | 82 +++ airflow/contrib/hooks/databricks_hook.py| 202 + .../contrib/operators/databricks_operator.py| 211 + airflow/exceptions.py | 2 +- airflow/models.py | 1 + airflow/utils/db.py | 4 + docs/code.rst | 1 + docs/integration.rst| 13 ++ setup.py| 2 + tests/contrib/hooks/databricks_hook.py | 226 +++ tests/contrib/operators/databricks_operator.py | 185 +++ 11 files changed, 928 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/53ca5084/airflow/contrib/example_dags/example_databricks_operator.py -- diff --git a/airflow/contrib/example_dags/example_databricks_operator.py b/airflow/contrib/example_dags/example_databricks_operator.py new file mode 100644 index 000..abf6844 --- /dev/null +++ b/airflow/contrib/example_dags/example_databricks_operator.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import airflow + +from airflow import DAG +from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator + +# This is an example DAG which uses the DatabricksSubmitRunOperator. +# In this example, we create two tasks which execute sequentially. +# The first task is to run a notebook at the workspace path "/test" +# and the second task is to run a JAR uploaded to DBFS. Both, +# tasks use new clusters. +# +# Because we have set a downstream dependency on the notebook task, +# the spark jar task will NOT run until the notebook task completes +# successfully. +# +# The definition of a succesful run is if the run has a result_state of "SUCCESS". +# For more information about the state of a run refer to +# https://docs.databricks.com/api/latest/jobs.html#runstate + +args = { +'owner': 'airflow', +'email': ['airf...@example.com'], +'depends_on_past': False, +'start_date': airflow.utils.dates.days_ago(2) +} + +dag = DAG( +dag_id='example_databricks_operator', default_args=args, +schedule_interval='@daily') + +new_cluster = { +'spark_version': '2.1.0-db3-scala2.11', +'node_type_id': 'r3.xlarge', +'aws_attributes': { +'availability': 'ON_DEMAND' +}, +'num_workers': 8 +} + +notebook_task_params = { +'new_cluster': new_cluster, +'notebook_task': { +'notebook_path': '/Users/airf...@example.com/PrepareData', +}, +} +# Example of using the JSON parameter to initialize the operator. +notebook_task = DatabricksSubmitRunOperator( +task_id='notebook_task', +dag=dag, +json=notebook_task_params) + +# Example of using the named parameters of DatabricksSubmitRunOperator +# to initialize the operator
[1/2] incubator-airflow git commit: Move presto.execute inside try catch to handle error
Repository: incubator-airflow Updated Branches: refs/heads/master db07e04f9 -> 6f4696ba2 Move presto.execute inside try catch to handle error This commit fixes an issue where malformed SQL would raise a DatabaseError outside of the try catch block in the hook. This should now raise a PrestoException as expected. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/d18a782f Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/d18a782f Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/d18a782f Branch: refs/heads/master Commit: d18a782f3beb8b6f55c85ea0c4d9894dc5550b0c Parents: 31f01b8 Author: Arthur Wiedmer Authored: Thu May 12 16:15:15 2016 -0700 Committer: Arthur Wiedmer Committed: Thu May 12 16:15:15 2016 -0700 -- airflow/hooks/presto_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/d18a782f/airflow/hooks/presto_hook.py -- diff --git a/airflow/hooks/presto_hook.py b/airflow/hooks/presto_hook.py index c445255..2acdef1 100644 --- a/airflow/hooks/presto_hook.py +++ b/airflow/hooks/presto_hook.py @@ -74,8 +74,8 @@ class PrestoHook(DbApiHook): """ import pandas cursor = self.get_cursor() -cursor.execute(self._strip_sql(hql), parameters) try: +cursor.execute(self._strip_sql(hql), parameters) data = cursor.fetchall() except DatabaseError as e: obj = eval(str(e))
[2/2] incubator-airflow git commit: [AIRFLOW-109] Fix try catch handling in PrestoHook
[AIRFLOW-109] Fix try catch handling in PrestoHook This addresses the issue with executing the SQL statement outside of the try block. In the case of a syntax error in the statement, the underlying library raises a Databases error which was meant to be handled (i.e., json parsed) by the catch. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/6f4696ba Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/6f4696ba Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/6f4696ba Branch: refs/heads/master Commit: 6f4696ba2ef18d74be8c18080b8ea7b9419608fb Parents: db07e04 d18a782 Author: Arthur Wiedmer Authored: Mon May 16 14:12:12 2016 -0700 Committer: Arthur Wiedmer Committed: Mon May 16 14:12:12 2016 -0700 -- airflow/hooks/presto_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --
incubator-airflow git commit: Revert "[AIRFLOW-179] DbApiHook string serialization fails when string contains non-ASCII characters"
Repository: incubator-airflow Updated Branches: refs/heads/master 87b4b8fa1 -> 8f6364058 Revert "[AIRFLOW-179] DbApiHook string serialization fails when string contains non-ASCII characters" This reverts commit 87b4b8fa19cb660317198d74f6d51fdde0a7e067. Reverting as the method used in the dbapi hook is actually package specific to MySQLdb and would break the sqlite and mssql hooks. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/8f636405 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/8f636405 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/8f636405 Branch: refs/heads/master Commit: 8f63640584ca2dcd15bcd361d1f9a0d995bad315 Parents: 87b4b8f Author: Arthur Wiedmer Authored: Fri May 27 11:38:57 2016 -0700 Committer: Arthur Wiedmer Committed: Fri May 27 11:38:57 2016 -0700 -- airflow/hooks/dbapi_hook.py | 21 - 1 file changed, 20 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/8f636405/airflow/hooks/dbapi_hook.py -- diff --git a/airflow/hooks/dbapi_hook.py b/airflow/hooks/dbapi_hook.py index 9e128a2..e5de92e 100644 --- a/airflow/hooks/dbapi_hook.py +++ b/airflow/hooks/dbapi_hook.py @@ -1,5 +1,8 @@ +from builtins import str from past.builtins import basestring +from datetime import datetime +import numpy import logging from airflow.hooks.base_hook import BaseHook @@ -168,7 +171,10 @@ class DbApiHook(BaseHook): i = 0 for row in rows: i += 1 -values = [conn.literal(cell) for cell in row] +l = [] +for cell in row: +l.append(self._serialize_cell(cell)) +values = tuple(l) sql = "INSERT INTO {0} {1} VALUES ({2});".format( table, target_fields, @@ -184,6 +190,19 @@ class DbApiHook(BaseHook): logging.info( "Done loading. Loaded a total of {i} rows".format(**locals())) +@staticmethod +def _serialize_cell(cell): +if isinstance(cell, basestring): +return "'" + str(cell).replace("'", "''") + "'" +elif cell is None: +return 'NULL' +elif isinstance(cell, numpy.datetime64): +return "'" + str(cell) + "'" +elif isinstance(cell, datetime): +return "'" + cell.isoformat() + "'" +else: +return str(cell) + def bulk_dump(self, table, tmp_file): """ Dumps a database table into a tab-delimited file
[1/2] incubator-airflow git commit: [AIRFLOW-180] Fix timeout behavior for sensors
Repository: incubator-airflow Updated Branches: refs/heads/master 45b735bae -> 26c31d9bc [AIRFLOW-180] Fix timeout behavior for sensors In the previous state of the code, datetime.now was compared to started_at and seconds was pulled out. It turns out that the seconds attribute of a timedelta has a maximum of 86400 and the rolls up to 1 day. The unintended consequence is that timeout larger than 86400 are ignored, with sensors running forever. To fix this we use the total_seconds method to get at the real timedelta in seconds. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/c38a5c2a Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/c38a5c2a Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/c38a5c2a Branch: refs/heads/master Commit: c38a5c2a8b227194ec52d81e8a5a85c97751ecd9 Parents: 45b735b Author: Arthur Wiedmer Authored: Thu May 26 10:27:55 2016 -0700 Committer: Arthur Wiedmer Committed: Mon Jun 20 15:45:44 2016 -0700 -- airflow/operators/sensors.py | 4 +- tests/operators/sensors.py | 77 ++- 2 files changed, 77 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/c38a5c2a/airflow/operators/sensors.py -- diff --git a/airflow/operators/sensors.py b/airflow/operators/sensors.py index 5276f6e..4e4cb3b 100644 --- a/airflow/operators/sensors.py +++ b/airflow/operators/sensors.py @@ -69,12 +69,12 @@ class BaseSensorOperator(BaseOperator): def execute(self, context): started_at = datetime.now() while not self.poke(context): -sleep(self.poke_interval) -if (datetime.now() - started_at).seconds > self.timeout: +if (datetime.now() - started_at).total_seconds() > self.timeout: if self.soft_fail: raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') +sleep(self.poke_interval) logging.info("Success criteria met. Exiting.") http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/c38a5c2a/tests/operators/sensors.py -- diff --git a/tests/operators/sensors.py b/tests/operators/sensors.py index 025790e..325ee8d 100644 --- a/tests/operators/sensors.py +++ b/tests/operators/sensors.py @@ -12,11 +12,84 @@ # See the License for the specific language governing permissions and # limitations under the License. + +import logging import os +import time import unittest -from airflow.operators.sensors import HttpSensor -from airflow.exceptions import AirflowException +from datetime import datetime, timedelta + +from airflow import DAG, configuration +from airflow.operators.sensors import HttpSensor, BaseSensorOperator +from airflow.utils.decorators import apply_defaults +from airflow.exceptions import (AirflowException, +AirflowSensorTimeout, +AirflowSkipException) +configuration.test_mode() + +DEFAULT_DATE = datetime(2015, 1, 1) +TEST_DAG_ID = 'unit_test_dag' + + +class TimeoutTestSensor(BaseSensorOperator): +""" +Sensor that always returns the return_value provided + +:param return_value: Set to true to mark the task as SKIPPED on failure +:type return_value: any +""" + +@apply_defaults +def __init__( +self, +return_value=False, +*args, +**kwargs): +self.return_value = return_value +super(TimeoutTestSensor, self).__init__(*args, **kwargs) + +def poke(self, context): +return self.return_value + +def execute(self, context): +started_at = datetime.now() +time_jump = self.params.get('time_jump') +while not self.poke(context): +if time_jump: +started_at -= time_jump +if (datetime.now() - started_at).total_seconds() > self.timeout: +if self.soft_fail: +raise AirflowSkipException('Snap. Time is OUT.') +else: +raise AirflowSensorTimeout('Snap. Time is OUT.') +time.sleep(self.poke_interval) +logging.info("Success criteria met. Exiting.") + + +class SensorTimeoutTest(unittest.TestCase): +def setUp(self): +configuration.test_mode() +args = { +'owner': 'airflow', +'start_date': DEFAULT_DATE +} +
[2/2] incubator-airflow git commit: Merge pull request #1547 from artwr/artwr_fix_sensor_timeout
Merge pull request #1547 from artwr/artwr_fix_sensor_timeout Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/26c31d9b Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/26c31d9b Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/26c31d9b Branch: refs/heads/master Commit: 26c31d9bca16a88c915d0d501aaa58915056a2a9 Parents: 45b735b c38a5c2 Author: Arthur Wiedmer Authored: Mon Jun 20 16:02:14 2016 -0700 Committer: Arthur Wiedmer Committed: Mon Jun 20 16:02:14 2016 -0700 -- airflow/operators/sensors.py | 4 +- tests/operators/sensors.py | 77 ++- 2 files changed, 77 insertions(+), 4 deletions(-) --
incubator-airflow git commit: [AIRFLOW-264] Adding workload management for Hive
Repository: incubator-airflow Updated Branches: refs/heads/master 8b86ee6a7 -> 92064398c [AIRFLOW-264] Adding workload management for Hive Dear Airflow Maintainers, Please accept this PR that addresses the following issues: - https://issues.apache.org/jira/browse/AIRFLOW-264 CC: Original PR by Jparks2532 https://github.com/apache/incubator-airflow/pull/1384 Add workload management to the hive hook and operator. Edited operator_helper to avoid KeyError on retrieving conf values. Refactored hive_cli command preparation in a separate private method. Added a small helper to flatten one level of an iterator to a list. Closes #1614 from artwr/artwr_fixing_hive_queue_PR Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/92064398 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/92064398 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/92064398 Branch: refs/heads/master Commit: 92064398c4c982a310925da376745a1713bf96e2 Parents: 8b86ee6 Author: Arthur Wiedmer Authored: Wed Jul 6 12:43:12 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Jul 6 12:43:44 2016 -0700 -- airflow/hooks/hive_hooks.py| 183 +++- airflow/operators/hive_operator.py | 22 +++- airflow/utils/helpers.py | 10 ++ airflow/utils/operator_helpers.py | 10 +- tests/operators/hive_operator.py | 12 ++- tests/operators/operators.py | 2 +- 6 files changed, 179 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/92064398/airflow/hooks/hive_hooks.py -- diff --git a/airflow/hooks/hive_hooks.py b/airflow/hooks/hive_hooks.py index eaad390..a9fac48 100644 --- a/airflow/hooks/hive_hooks.py +++ b/airflow/hooks/hive_hooks.py @@ -17,19 +17,25 @@ from __future__ import print_function from builtins import zip from past.builtins import basestring +import collections import unicodecsv as csv +import itertools import logging import re import subprocess +import time from tempfile import NamedTemporaryFile import hive_metastore from airflow.exceptions import AirflowException from airflow.hooks.base_hook import BaseHook +from airflow.utils.helpers import as_flattened_list from airflow.utils.file import TemporaryDirectory from airflow import configuration import airflow.security.utils as utils +HIVE_QUEUE_PRIORITIES = ['VERY_HIGH', 'HIGH', 'NORMAL', 'LOW', 'VERY_LOW'] + class HiveCliHook(BaseHook): @@ -47,12 +53,24 @@ class HiveCliHook(BaseHook): The extra connection parameter ``auth`` gets passed as in the ``jdbc`` connection string as is. + +:param mapred_queue: queue used by the Hadoop Scheduler (Capacity or Fair) +:type mapred_queue: string +:param mapred_queue_priority: priority within the job queue. +Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW +:type mapred_queue_priority: string +:param mapred_job_name: This name will appear in the jobtracker. +This can make monitoring easier. +:type mapred_job_name: string """ def __init__( self, hive_cli_conn_id="hive_cli_default", -run_as=None): +run_as=None, +mapred_queue=None, +mapred_queue_priority=None, +mapred_job_name=None): conn = self.get_connection(hive_cli_conn_id) self.hive_cli_params = conn.extra_dejson.get('hive_cli_params', '') self.use_beeline = conn.extra_dejson.get('use_beeline', False) @@ -60,16 +78,92 @@ class HiveCliHook(BaseHook): self.conn = conn self.run_as = run_as +if mapred_queue_priority: +mapred_queue_priority = mapred_queue_priority.upper() +if mapred_queue_priority not in HIVE_QUEUE_PRIORITIES: +raise AirflowException( +"Invalid Mapred Queue Priority. Valid values are: " +"{}".format(', '.join(HIVE_QUEUE_PRIORITIES))) + +self.mapred_queue = mapred_queue +self.mapred_queue_priority = mapred_queue_priority +self.mapred_job_name = mapred_job_name + +def _prepare_cli_cmd(self): +""" +This function creates the command list from available information +""" +conn = self.conn +hive_bin = 'hive' +cmd_extra = [] + +if self.use_beeline: +hive_bin = 'beeline' +jdbc_url = "jdbc:hive2://{conn.host}:{conn.port}/{conn.schema}" +
[2/2] incubator-airflow git commit: Merge pull request #1402 from lauralorenz/schedule_interval_default_args_docs
Merge pull request #1402 from lauralorenz/schedule_interval_default_args_docs Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/916f1eb2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/916f1eb2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/916f1eb2 Branch: refs/heads/master Commit: 916f1eb2feedae4f4d827466cfe91821ef30f885 Parents: 0235d59 80d3c8d Author: Arthur Wiedmer Authored: Mon Oct 17 09:46:57 2016 -0700 Committer: Arthur Wiedmer Committed: Mon Oct 17 09:46:57 2016 -0700 -- airflow/example_dags/tutorial.py | 14 -- docs/faq.rst | 5 + 2 files changed, 17 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/916f1eb2/airflow/example_dags/tutorial.py -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/916f1eb2/docs/faq.rst --
[1/2] incubator-airflow git commit: [AIRFLOW-575] Clarify tutorial and FAQ about `schedule_interval` always inheriting from DAG object
Repository: incubator-airflow Updated Branches: refs/heads/master 0235d59d0 -> 916f1eb2f [AIRFLOW-575] Clarify tutorial and FAQ about `schedule_interval` always inheriting from DAG object - Update the tutorial with a comment helping to explain the use of default_args and include all the possible parameters in line - Clarify in the FAQ the possibility of an unexpected default `schedule_interval`in case airflow users mistakenly try to overwrite the default `schedule_interval` in a DAG's `default_args` parameter Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/80d3c8d4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/80d3c8d4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/80d3c8d4 Branch: refs/heads/master Commit: 80d3c8d461f1c95d173aa72a055737d8ad379ae1 Parents: 11ad53a Author: lauralorenz Authored: Tue Apr 19 17:03:46 2016 -0400 Committer: lauralorenz Committed: Mon Oct 17 12:36:38 2016 -0400 -- airflow/example_dags/tutorial.py | 14 -- docs/faq.rst | 5 + 2 files changed, 17 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/80d3c8d4/airflow/example_dags/tutorial.py -- diff --git a/airflow/example_dags/tutorial.py b/airflow/example_dags/tutorial.py index 9462463..e929389 100644 --- a/airflow/example_dags/tutorial.py +++ b/airflow/example_dags/tutorial.py @@ -10,6 +10,8 @@ from datetime import datetime, timedelta seven_days_ago = datetime.combine(datetime.today() - timedelta(7), datetime.min.time()) +# these args will get passed on to each operator +# you can override them on a per-task basis during operator initialization default_args = { 'owner': 'airflow', 'depends_on_past': False, @@ -22,11 +24,19 @@ default_args = { # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, -# 'schedule_interval': timedelta(1), # 'end_date': datetime(2016, 1, 1), +# 'wait_for_downstream': False, +# 'dag': dag, +# 'adhoc':False, +# 'sla': timedelta(hours=2), +# 'execution_timeout': timedelta(seconds=300), +# 'on_failure_callback': some_function, +# 'on_success_callback': some_other_function, +# 'on_retry_callback': another_function, +# 'trigger_rule': u'all_success' } -dag = DAG('tutorial', default_args=default_args) +dag = DAG('tutorial', default_args=default_args, schedule_interval=timedelta(days=1)) # t1, t2 and t3 are examples of tasks created by instantiating operators t1 = BashOperator( http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/80d3c8d4/docs/faq.rst -- diff --git a/docs/faq.rst b/docs/faq.rst index 6418dcb..b5b28af 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -17,6 +17,11 @@ Here are some of the common causes: - Is your ``start_date`` set properly? The Airflow scheduler triggers the task soon after the ``start_date + scheduler_interval`` is passed. +- Is your ``schedule_interval`` set properly? The default ``schedule_interval`` + is one day (``datetime.timedelta(1)``). You must specify a different ``schedule_interval`` + directly to the DAG object you instantiate, not as a ``default_param``, as task instances + do not override their parent DAG's ``schedule_interval``. + - Is your ``start_date`` beyond where you can see it in the UI? If you set your it to some time say 3 months ago, you won't be able to see it in the main view in the UI, but you should be able to see it in the
incubator-airflow git commit: closes apache/incubator-airflow#989 *no movement from submitter*
Repository: incubator-airflow Updated Branches: refs/heads/master 3c5d98082 -> fb4050579 closes apache/incubator-airflow#989 *no movement from submitter* Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/fb405057 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/fb405057 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/fb405057 Branch: refs/heads/master Commit: fb40505795d3f05378f907275f740b11762ceba1 Parents: 3c5d980 Author: Arthur Wiedmer Authored: Wed Oct 26 11:15:27 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Oct 26 11:15:27 2016 -0700 -- --
incubator-airflow git commit: closes apache/incubator-airflow#908 *no movement from submitter*
Repository: incubator-airflow Updated Branches: refs/heads/master 8ec5f7f99 -> 3c5d98082 closes apache/incubator-airflow#908 *no movement from submitter* Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/3c5d9808 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/3c5d9808 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/3c5d9808 Branch: refs/heads/master Commit: 3c5d980822fa966b12d8809562b94699a31c0220 Parents: 8ec5f7f Author: Arthur Wiedmer Authored: Wed Oct 26 11:14:18 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Oct 26 11:14:18 2016 -0700 -- --
incubator-airflow git commit: closes apache/incubator-airflow#1276 *no movement from submitter*
Repository: incubator-airflow Updated Branches: refs/heads/master fb4050579 -> f2f1b29a1 closes apache/incubator-airflow#1276 *no movement from submitter* Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/f2f1b29a Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/f2f1b29a Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/f2f1b29a Branch: refs/heads/master Commit: f2f1b29a1b16449d88ccd480048f1f18eccbbeb2 Parents: fb40505 Author: Arthur Wiedmer Authored: Wed Oct 26 11:21:28 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Oct 26 11:21:28 2016 -0700 -- --
incubator-airflow git commit: closes apache/incubator-airflow#1274 *no movement from submitter*
Repository: incubator-airflow Updated Branches: refs/heads/master f2f1b29a1 -> 1425d7246 closes apache/incubator-airflow#1274 *no movement from submitter* Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/1425d724 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/1425d724 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/1425d724 Branch: refs/heads/master Commit: 1425d72468227486e002fa6406f1ee718cd73d67 Parents: f2f1b29 Author: Arthur Wiedmer Authored: Wed Oct 26 11:21:59 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Oct 26 11:21:59 2016 -0700 -- --
incubator-airflow git commit: closes apache/incubator-airflow#1379 *obsolete*
Repository: incubator-airflow Updated Branches: refs/heads/master 64d7e3fde -> c1747b7bb closes apache/incubator-airflow#1379 *obsolete* Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/c1747b7b Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/c1747b7b Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/c1747b7b Branch: refs/heads/master Commit: c1747b7bb0cdfcd3f71dc00cda0eb39a36fcbabc Parents: 64d7e3f Author: Arthur Wiedmer Authored: Wed Oct 26 11:26:09 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Oct 26 11:26:09 2016 -0700 -- --
incubator-airflow git commit: closes apache/incubator-airflow#1301 *obsolete*
Repository: incubator-airflow Updated Branches: refs/heads/master 1425d7246 -> 64d7e3fde closes apache/incubator-airflow#1301 *obsolete* Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/64d7e3fd Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/64d7e3fd Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/64d7e3fd Branch: refs/heads/master Commit: 64d7e3fdebec95d6f8a57a20afc28c19f373fc9e Parents: 1425d72 Author: Arthur Wiedmer Authored: Wed Oct 26 11:23:39 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Oct 26 11:23:39 2016 -0700 -- --
incubator-airflow git commit: closes apache/incubator-airflow#1384 *obsolete*
Repository: incubator-airflow Updated Branches: refs/heads/master c1747b7bb -> 84cb7e809 closes apache/incubator-airflow#1384 *obsolete* Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/84cb7e80 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/84cb7e80 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/84cb7e80 Branch: refs/heads/master Commit: 84cb7e80959ad2896fa9c4f67f58c274c66c0823 Parents: c1747b7 Author: Arthur Wiedmer Authored: Wed Oct 26 11:26:59 2016 -0700 Committer: Arthur Wiedmer Committed: Wed Oct 26 11:26:59 2016 -0700 -- --
[1/2] incubator-airflow git commit: [AIRFLOW-137] Fix max_active_runs on clearing tasks
Repository: incubator-airflow Updated Branches: refs/heads/master 365af16b4 -> 0112f69fa [AIRFLOW-137] Fix max_active_runs on clearing tasks Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/804421b5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/804421b5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/804421b5 Branch: refs/heads/master Commit: 804421b53e8e447fd5c09348415b1f795ce560b5 Parents: fa977b6 Author: gtoonstra Authored: Sun Nov 6 17:25:06 2016 +0100 Committer: gtoonstra Committed: Wed Nov 16 18:57:04 2016 +0100 -- airflow/jobs.py | 8 ++-- tests/jobs.py | 39 +++ 2 files changed, 45 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/804421b5/airflow/jobs.py -- diff --git a/airflow/jobs.py b/airflow/jobs.py index 972f597..7eb4b99 100644 --- a/airflow/jobs.py +++ b/airflow/jobs.py @@ -793,8 +793,12 @@ class SchedulerJob(BaseJob): self.logger.info("Examining DAG run {}".format(run)) # don't consider runs that are executed in the future if run.execution_date > datetime.now(): -self.logging.error("Execution date is in future: {}" - .format(run.execution_date)) +self.logger.error("Execution date is in future: {}" + .format(run.execution_date)) +continue + +if len(active_dag_runs) >= dag.max_active_runs: +self.logger.info("Active dag runs > max_active_run.") continue # skip backfill dagruns for now as long as they are not really scheduled http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/804421b5/tests/jobs.py -- diff --git a/tests/jobs.py b/tests/jobs.py index af7ad61..5562177 100644 --- a/tests/jobs.py +++ b/tests/jobs.py @@ -701,6 +701,45 @@ class SchedulerJobTest(unittest.TestCase): new_dr = scheduler.create_dag_run(dag) self.assertIsNotNone(new_dr) +def test_scheduler_max_active_runs_respected_after_clear(self): +""" +Test if _process_task_instances only schedules ti's up to max_active_runs +(related to issue AIRFLOW-137) +""" +dag = DAG( +dag_id='test_scheduler_max_active_runs_respected_after_clear', +start_date=DEFAULT_DATE) +dag.max_active_runs = 3 + +dag_task1 = DummyOperator( +task_id='dummy', +dag=dag, +owner='airflow') + +session = settings.Session() +orm_dag = DagModel(dag_id=dag.dag_id) +session.merge(orm_dag) +session.commit() +session.close() + +scheduler = SchedulerJob() +dag.clear() + +# First create up to 3 dagruns in RUNNING state. +scheduler.create_dag_run(dag) + +# Reduce max_active_runs to 1 +dag.max_active_runs = 1 + +queue = mock.Mock() +# and schedule them in, so we can check how many +# tasks are put on the queue (should be one, not 3) +scheduler._process_task_instances(dag, queue=queue) + +queue.append.assert_called_with( +(dag.dag_id, dag_task1.task_id, DEFAULT_DATE) +) + def test_scheduler_auto_align(self): """ Test if the schedule_interval will be auto aligned with the start_date
[2/2] incubator-airflow git commit: Merge pull request #1870 from gtoonstra/maxactiveruns_fix
Merge pull request #1870 from gtoonstra/maxactiveruns_fix Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/0112f69f Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/0112f69f Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/0112f69f Branch: refs/heads/master Commit: 0112f69fa5f85271cfc1fd8597b8765a04575bb6 Parents: 365af16 804421b Author: Arthur Wiedmer Authored: Wed Nov 16 10:49:22 2016 -0800 Committer: Arthur Wiedmer Committed: Wed Nov 16 10:49:22 2016 -0800 -- airflow/jobs.py | 8 ++-- tests/jobs.py | 39 +++ 2 files changed, 45 insertions(+), 2 deletions(-) --
incubator-airflow git commit: [AIRFLOW-1235] Fix webserver's odd behaviour
Repository: incubator-airflow Updated Branches: refs/heads/master 8c42d03c4 -> 7e762d42d [AIRFLOW-1235] Fix webserver's odd behaviour In some cases, the gunicorn master shuts down but the webserver monitor process doesn't. This PR add timeout functionality to shutdown all related processes in such cases. Dear Airflow maintainers, Please accept this PR. I understand that it will not be reviewed until I have checked off all the steps below! ### JIRA - [x] My PR addresses the following [Airflow JIRA] (https://issues.apache.org/jira/browse/AIRFLOW/) issues and references them in the PR title. For example, "[AIRFLOW-XXX] My Airflow PR" - https://issues.apache.org/jira/browse/AIRFLOW-1235 ### Description - [x] Here are some details about my PR, including screenshots of any UI changes: In some cases, the gunicorn master shuts down but the webserver monitor process doesn't. This PR add timeout functionality to shutdown all related processes in such cases. ### Tests - [x] My PR adds the following unit tests __OR__ does not need testing for this extremely good reason: tests.core:CliTests.test_cli_webserver_shutdown_wh en_gunicorn_master_is_killed ### Commits - [x] My commits all reference JIRA issues in their subject lines, and I have squashed multiple commits if they address the same issue. In addition, my commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git- commit/)": 1. Subject is separated from body by a blank line 2. Subject is limited to 50 characters 3. Subject does not end with a period 4. Subject uses the imperative mood ("add", not "adding") 5. Body wraps at 72 characters 6. Body explains "what" and "why", not "how" Closes #2330 from sekikn/AIRFLOW-1235 Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/7e762d42 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/7e762d42 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/7e762d42 Branch: refs/heads/master Commit: 7e762d42df50d84e4740e15c24594c50aaab53a2 Parents: 8c42d03 Author: Kengo Seki Authored: Thu Mar 22 11:50:27 2018 -0700 Committer: Arthur Wiedmer Committed: Thu Mar 22 11:50:27 2018 -0700 -- airflow/bin/cli.py | 129 +- airflow/config_templates/default_airflow.cfg | 3 + airflow/exceptions.py| 4 + tests/core.py| 10 ++ 4 files changed, 91 insertions(+), 55 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/7e762d42/airflow/bin/cli.py -- diff --git a/airflow/bin/cli.py b/airflow/bin/cli.py index 449d8ca..1801cc7 100755 --- a/airflow/bin/cli.py +++ b/airflow/bin/cli.py @@ -46,7 +46,7 @@ import airflow from airflow import api from airflow import jobs, settings from airflow import configuration as conf -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, AirflowWebServerTimeout from airflow.executors import GetDefaultExecutor from airflow.models import (DagModel, DagBag, TaskInstance, DagPickle, DagRun, Variable, DagStat, @@ -592,7 +592,12 @@ def get_num_ready_workers_running(gunicorn_master_proc): return len(ready_workers) -def restart_workers(gunicorn_master_proc, num_workers_expected): +def get_num_workers_running(gunicorn_master_proc): +workers = psutil.Process(gunicorn_master_proc.pid).children() +return len(workers) + + +def restart_workers(gunicorn_master_proc, num_workers_expected, master_timeout): """ Runs forever, monitoring the child processes of @gunicorn_master_proc and restarting workers occasionally. @@ -618,17 +623,18 @@ def restart_workers(gunicorn_master_proc, num_workers_expected): gracefully and that the oldest worker is terminated. """ -def wait_until_true(fn): +def wait_until_true(fn, timeout=0): """ Sleeps until fn is true """ +t = time.time() while not fn(): +if 0 < timeout and timeout <= time.time() - t: +raise AirflowWebServerTimeout( +"No response from gunicorn master within {0} seconds" +.format(timeout)) time.sleep(0.1) -def get_num_workers_running(gunicorn_master_proc): -workers = psutil.Process(gunicorn_master_proc.pid).children() -return len(workers) - def start_refresh(gunicorn_master_proc): ba
incubator-airflow git commit: [AIRFLOW-2335] fix issue with jdk8 download for ci
Repository: incubator-airflow Updated Branches: refs/heads/master 3f1bfd38c -> 0f8507ae3 [AIRFLOW-2335] fix issue with jdk8 download for ci Make sure you have checked _all_ steps below. - [x] My PR addresses the following [Airflow JIRA] (https://issues.apache.org/jira/browse/AIRFLOW/) issues and references them in the PR title. For example, "\[AIRFLOW-XXX\] My Airflow PR" - https://issues.apache.org/jira/browse/AIRFLOW-2335 - In case you are fixing a typo in the documentation you can prepend your commit with \[AIRFLOW-XXX\], code changes always need a JIRA issue. - [x] Here are some details about my PR, including screenshots of any UI changes: There is an issue with travis pulling jdk8 that is preventing CI jobs from running. This blocks further development of the project. Reference: https://github.com/travis-ci/travis- ci/issues/9512#issuecomment-382235301 - [x] My PR adds the following unit tests __OR__ does not need testing for this extremely good reason: This PR can't be unit tested since it is just configuration. However, the fact that unit tests run successfully should show that it works. - [ ] My commits all reference JIRA issues in their subject lines, and I have squashed multiple commits if they address the same issue. In addition, my commits follow the guidelines from "[How to write a good git commit message](http://chris.beams.io/posts/git- commit/)": 1. Subject is separated from body by a blank line 2. Subject is limited to 50 characters 3. Subject does not end with a period 4. Subject uses the imperative mood ("add", not "adding") 5. Body wraps at 72 characters 6. Body explains "what" and "why", not "how" - [ ] In case of new functionality, my PR adds documentation that describes how to use it. - When adding new operators/hooks/sensors, the autoclass documentation generation needs to be added. - [ ] Passes `git diff upstream/master -u -- "*.py" | flake8 --diff` Closes #3236 from dimberman/AIRFLOW- 2335_travis_issue Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/0f8507ae Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/0f8507ae Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/0f8507ae Branch: refs/heads/master Commit: 0f8507ae351787e086d1d1038f6f0ba52e6d9aaa Parents: 3f1bfd3 Author: Daniel Imberman Authored: Tue Apr 17 21:57:14 2018 -0700 Committer: Arthur Wiedmer Committed: Tue Apr 17 21:57:42 2018 -0700 -- .travis.yml | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/0f8507ae/.travis.yml -- diff --git a/.travis.yml b/.travis.yml index d9a333d..883473d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,9 +6,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -37,7 +37,6 @@ addons: - krb5-user - krb5-kdc - krb5-admin-server - - oracle-java8-installer - python-selinux postgresql: "9.2" python: @@ -93,7 +92,19 @@ before_install: - cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys - ln -s ~/.ssh/authorized_keys ~/.ssh/authorized_keys2 - chmod 600 ~/.ssh/* + - sudo add-apt-repository -y ppa:webupd8team/java + - sudo apt-get update + - sudo apt-get install -y oracle-java8-installer || true + #todo remove this kludge and the above || true when the ppa is fixed + - cd /var/lib/dpkg/info + - sudo sed -i 's|JAVA_VERSION=8u161|JAVA_VERSION=8u172|' oracle-java8-installer.* + - sudo sed -i 's|PARTNER_URL=http://download.oracle.com/otn-pub/java/jdk/8u161-b12/2f38c3b165be4555a1fa6e98c45e0808/|PARTNER_URL=http://download.oracle.com/otn-pub/java/jdk/8u172-b11/a58eab1ec242421181065cdc37240b08/|' oracle-java8-installer.* + - sudo sed -i 's|SHA256SUM_TGZ="6dbc56a0e3310b69e91bb64db63a485bd7b6a8083f08e48047276380a0e2021e"|SHA256SUM_TGZ="28a00b9400b6913563553e09e8024c286b506d8523334c93ddec6c9ec7e9d346"|' oracle-java8-installer.* + - sudo sed -i 's|J_DIR=jdk1.8.0_161|J_DIR=jdk1.8.0_172|' oracle-java8-installer.* + - sudo apt-get update + - sudo apt-get install -y oracle-java8-installer - jdk_switcher use oraclejdk8 + - cd $TRAVIS_BUILD_DIR install: - pip install --upgrade pip - pip install tox
incubator-airflow git commit: [AIRFLOW-2240][DASK] Added TLS/SSL support for the dask-distributed scheduler.
Repository: incubator-airflow Updated Branches: refs/heads/master 3fa55db90 -> e95a1251b [AIRFLOW-2240][DASK] Added TLS/SSL support for the dask-distributed scheduler. As of 0.17.0 dask distributed has support for TLS/SSL. [dask] Added TLS/SSL support for the dask- distributed scheduler. As of 0.17.0 dask distributed has support for TLS/SSL. Add a test for tls under dask distributed Closes #2683 from mariusvniekerk/dask-ssl Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/e95a1251 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/e95a1251 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/e95a1251 Branch: refs/heads/master Commit: e95a1251b746ac74a47b3ccae52d4abdc26add82 Parents: 3fa55db Author: Marius van Niekerk Authored: Wed Apr 18 09:45:46 2018 -0700 Committer: Arthur Wiedmer Committed: Wed Apr 18 09:45:52 2018 -0700 -- airflow/config_templates/default_airflow.cfg | 4 ++ airflow/executors/dask_executor.py | 16 +- setup.py | 2 +- tests/executors/dask_executor.py | 61 +++ 4 files changed, 72 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/e95a1251/airflow/config_templates/default_airflow.cfg -- diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index bb3793b..400bcc0 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -364,6 +364,10 @@ ssl_cacert = # The IP address and port of the Dask cluster's scheduler. cluster_address = 127.0.0.1:8786 +# TLS/ SSL settings to access a secured Dask scheduler. +tls_ca = +tls_cert = +tls_key = [scheduler] http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/e95a1251/airflow/executors/dask_executor.py -- diff --git a/airflow/executors/dask_executor.py b/airflow/executors/dask_executor.py index 0d914ba..17ace55 100644 --- a/airflow/executors/dask_executor.py +++ b/airflow/executors/dask_executor.py @@ -36,10 +36,24 @@ class DaskExecutor(BaseExecutor): raise ValueError( 'Please provide a Dask cluster address in airflow.cfg') self.cluster_address = cluster_address +# ssl / tls parameters +self.tls_ca = configuration.get('dask', 'tls_ca') +self.tls_key = configuration.get('dask', 'tls_key') +self.tls_cert = configuration.get('dask', 'tls_cert') super(DaskExecutor, self).__init__(parallelism=0) def start(self): -self.client = distributed.Client(self.cluster_address) +if (self.tls_ca) or (self.tls_key) or (self.tls_cert): +from distributed.security import Security +security = Security( +tls_client_key=self.tls_key, +tls_client_cert=self.tls_cert, +tls_ca_file=self.tls_ca, +) +else: +security = None + +self.client = distributed.Client(self.cluster_address, security=security) self.futures = {} def execute_async(self, key, command, queue=None): http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/e95a1251/setup.py -- diff --git a/setup.py b/setup.py index 700f3ae..9709ddb 100644 --- a/setup.py +++ b/setup.py @@ -112,7 +112,7 @@ cgroups = [ ] crypto = ['cryptography>=0.9.3'] dask = [ -'distributed>=1.15.2, <2' +'distributed>=1.17.1, <2' ] databricks = ['requests>=2.5.1, <3'] datadog = ['datadog>=0.14.0'] http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/e95a1251/tests/executors/dask_executor.py -- diff --git a/tests/executors/dask_executor.py b/tests/executors/dask_executor.py index 40796bc..7937b1b 100644 --- a/tests/executors/dask_executor.py +++ b/tests/executors/dask_executor.py @@ -29,6 +29,13 @@ from datetime import timedelta try: from airflow.executors.dask_executor import DaskExecutor from distributed import LocalCluster +# utility functions imported from the dask testing suite to instantiate a test +# cluster for tls tests +from distributed.utils_test import ( +get_cert, +cluster as dask_testing_cluster, +tls_security, +) SKIP_DASK = False except ImportError: SKIP_DASK = True
incubator-airflow git commit: [AIRFLOW-74] SubdagOperators can consume all celeryd worker processes
Repository: incubator-airflow Updated Branches: refs/heads/master be886b986 -> 64d950166 [AIRFLOW-74] SubdagOperators can consume all celeryd worker processes Closes #3251 from feng-tao/airflow-74 Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/64d95016 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/64d95016 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/64d95016 Branch: refs/heads/master Commit: 64d950166773749c0e4aa0d7032b080cadd56a53 Parents: be886b9 Author: Tao feng Authored: Tue Apr 24 10:13:25 2018 -0700 Committer: Arthur Wiedmer Committed: Tue Apr 24 10:13:25 2018 -0700 -- UPDATING.md | 2 ++ airflow/operators/subdag_operator.py | 22 ++ tests/operators/subdag_operator.py | 19 +-- 3 files changed, 29 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/64d95016/UPDATING.md -- diff --git a/UPDATING.md b/UPDATING.md index 881539f..609c8db 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -5,6 +5,8 @@ assists users migrating to a new version. ## Airflow Master +### Default executor for SubDagOperator is changed to SequentialExecutor + ### New Webserver UI with Role-Based Access Control The current webserver UI uses the Flask-Admin extension. The new webserver UI uses the [Flask-AppBuilder (FAB)](https://github.com/dpgaspar/Flask-AppBuilder) extension. FAB has built-in authentication support and Role-Based Access Control (RBAC), which provides configurable roles and permissions for individual users. http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/64d95016/airflow/operators/subdag_operator.py -- diff --git a/airflow/operators/subdag_operator.py b/airflow/operators/subdag_operator.py index c3c7591..052095e 100644 --- a/airflow/operators/subdag_operator.py +++ b/airflow/operators/subdag_operator.py @@ -7,9 +7,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -18,10 +18,10 @@ # under the License. from airflow.exceptions import AirflowException +from airflow.executors.sequential_executor import SequentialExecutor from airflow.models import BaseOperator, Pool from airflow.utils.decorators import apply_defaults from airflow.utils.db import provide_session -from airflow.executors import GetDefaultExecutor class SubDagOperator(BaseOperator): @@ -35,16 +35,19 @@ class SubDagOperator(BaseOperator): def __init__( self, subdag, -executor=GetDefaultExecutor(), +executor=SequentialExecutor(), *args, **kwargs): """ -Yo dawg. This runs a sub dag. By convention, a sub dag's dag_id +This runs a sub dag. By convention, a sub dag's dag_id should be prefixed by its parent and a dot. As in `parent.child`. :param subdag: the DAG object to run as a subdag of the current DAG. -:type subdag: airflow.DAG -:param dag: the parent DAG -:type subdag: airflow.DAG +:type subdag: airflow.DAG. +:param dag: the parent DAG for the subdag. +:type dag: airflow.DAG. +:param executor: the executor for this subdag. Default to use SequentialExecutor. + Please find AIRFLOW-74 for more details. +:type executor: airflow.executors. """ import airflow.models dag = kwargs.get('dag') or airflow.models._CONTEXT_MANAGER_DAG @@ -88,6 +91,9 @@ class SubDagOperator(BaseOperator): ) self.subdag = subdag +# Airflow pool is not honored by SubDagOperator. +# Hence resources could be consumed by SubdagOperators +# Use other executor with your own risk. self.executor = executor def execute(self, context): http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/64d95016/tests/operators/subdag_operator.py -- diff --git a/tests/operators/subdag_operator.py b/tests/operators/subdag_operator.py index 5b51f1c..af47c5c 100644 --- a/tests/operators/subdag_operator.py +++ b/tests/operators/subdag_op
incubator-airflow git commit: [AIRFLOW-2380] Add support for environment variables in Spark submit operator.
Repository: incubator-airflow Updated Branches: refs/heads/master b0d0d0a04 -> 36193fc74 [AIRFLOW-2380] Add support for environment variables in Spark submit operator. Closes #3268 from piffall/master Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/36193fc7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/36193fc7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/36193fc7 Branch: refs/heads/master Commit: 36193fc7449ca67c807b54ad17a086b35c0c4471 Parents: b0d0d0a Author: Cristòfol Torrens Authored: Thu Apr 26 14:21:21 2018 -0700 Committer: Arthur Wiedmer Committed: Thu Apr 26 14:21:21 2018 -0700 -- airflow/contrib/hooks/spark_submit_hook.py | 29 +- .../contrib/operators/spark_submit_operator.py | 10 +++- tests/contrib/hooks/test_spark_submit_hook.py | 59 +++- 3 files changed, 91 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/36193fc7/airflow/contrib/hooks/spark_submit_hook.py -- diff --git a/airflow/contrib/hooks/spark_submit_hook.py b/airflow/contrib/hooks/spark_submit_hook.py index 71c68c0..0185cab 100644 --- a/airflow/contrib/hooks/spark_submit_hook.py +++ b/airflow/contrib/hooks/spark_submit_hook.py @@ -7,9 +7,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -80,6 +80,9 @@ class SparkSubmitHook(BaseHook, LoggingMixin): :type num_executors: int :param application_args: Arguments for the application being submitted :type application_args: list +:param env_vars: Environment variables for spark-submit. It + supports yarn and k8s mode too. +:type env_vars: dict :param verbose: Whether to pass the verbose flag to spark-submit process for debugging :type verbose: bool """ @@ -103,6 +106,7 @@ class SparkSubmitHook(BaseHook, LoggingMixin): name='default-name', num_executors=None, application_args=None, + env_vars=None, verbose=False): self._conf = conf self._conn_id = conn_id @@ -123,6 +127,7 @@ class SparkSubmitHook(BaseHook, LoggingMixin): self._name = name self._num_executors = num_executors self._application_args = application_args +self._env_vars = env_vars self._verbose = verbose self._submit_sp = None self._yarn_application_id = None @@ -209,6 +214,20 @@ class SparkSubmitHook(BaseHook, LoggingMixin): if self._conf: for key in self._conf: connection_cmd += ["--conf", "{}={}".format(key, str(self._conf[key]))] +if self._env_vars and (self._is_kubernetes or self._is_yarn): +if self._is_yarn: +tmpl = "spark.yarn.appMasterEnv.{}={}" +else: +tmpl = "spark.kubernetes.driverEnv.{}={}" +for key in self._env_vars: +connection_cmd += [ +"--conf", +tmpl.format(key, str(self._env_vars[key]))] +elif self._env_vars and self._connection['deploy_mode'] != "cluster": +self._env = self._env_vars # Do it on Popen of the process +elif self._env_vars and self._connection['deploy_mode'] == "cluster": +raise AirflowException( +"SparkSubmitHook env_vars is not supported in standalone-cluster mode.") if self._is_kubernetes: connection_cmd += ["--conf", "spark.kubernetes.namespace={}".format( self._connection['namespace'])] @@ -294,6 +313,12 @@ class SparkSubmitHook(BaseHook, LoggingMixin): :param kwargs: extra arguments to Popen (see subprocess.Popen) """ spark_submit_cmd = self._build_spark_submit_command(application) + +if hasattr(self, '_env'): +env = os.environ.copy() +env.update(self._env) +kwargs["env"] = env + self._submit_sp = subprocess.Popen(spark_submit_cmd, std
incubator-airflow git commit: [AIRFLOW-2086][AIRFLOW-2393] Customize default dagrun number in tree view
Repository: incubator-airflow Updated Branches: refs/heads/master 2728138f1 -> 2a55ffe0c [AIRFLOW-2086][AIRFLOW-2393] Customize default dagrun number in tree view Closes #3279 from feng-tao/reduce-tree-view This introduces a new configuration variable to set the default number of dag runs displayed in the tree view. For large DAGs, this could cause timeouts in the webserver. Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/2a55ffe0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/2a55ffe0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/2a55ffe0 Branch: refs/heads/master Commit: 2a55ffe0cd2c04fde2eecdbeff2fa727067069e1 Parents: 2728138 Author: Tao feng Authored: Wed May 9 08:45:06 2018 -0700 Committer: Arthur Wiedmer Committed: Wed May 9 08:45:17 2018 -0700 -- UPDATING.md | 3 +++ airflow/config_templates/default_airflow.cfg | 3 +++ airflow/www/templates/airflow/dag.html | 2 +- airflow/www/templates/airflow/dags.html | 2 +- airflow/www/templates/airflow/list_dags.html | 2 +- airflow/www/views.py | 19 --- airflow/www_rbac/templates/airflow/dag.html | 2 +- airflow/www_rbac/templates/airflow/dags.html | 2 +- airflow/www_rbac/views.py| 14 +- 9 files changed, 32 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/2a55ffe0/UPDATING.md -- diff --git a/UPDATING.md b/UPDATING.md index defd95b..c9e1395 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -5,6 +5,9 @@ assists users migrating to a new version. ## Airflow Master +### Add a configuration variable(default_dag_run_display_number) to control numbers of dag run for display +Add a configuration variable(default_dag_run_display_number) under webserver section to control num of dag run to show in UI. + ### Default executor for SubDagOperator is changed to SequentialExecutor ### New Webserver UI with Role-Based Access Control http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/2a55ffe0/airflow/config_templates/default_airflow.cfg -- diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index b91961e..33b99ff 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -280,6 +280,9 @@ rbac = False # Define the color of navigation bar navbar_color = #007A87 +# Default dagrun to show in UI +default_dag_run_display_number = 25 + [email] email_backend = airflow.utils.email.send_email_smtp http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/2a55ffe0/airflow/www/templates/airflow/dag.html -- diff --git a/airflow/www/templates/airflow/dag.html b/airflow/www/templates/airflow/dag.html index ed84f27..18242d3 100644 --- a/airflow/www/templates/airflow/dag.html +++ b/airflow/www/templates/airflow/dag.html @@ -57,7 +57,7 @@ Graph View - + Tree View http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/2a55ffe0/airflow/www/templates/airflow/dags.html -- diff --git a/airflow/www/templates/airflow/dags.html b/airflow/www/templates/airflow/dags.html index d22bfb3..2397890 100644 --- a/airflow/www/templates/airflow/dags.html +++ b/airflow/www/templates/airflow/dags.html @@ -145,7 +145,7 @@ - + http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/2a55ffe0/airflow/www/templates/airflow/list_dags.html -- diff --git a/airflow/www/templates/airflow/list_dags.html b/airflow/www/templates/airflow/list_dags.html index e8533d7..c7f2497 100644 --- a/airflow/www/templates/airflow/list_dags.html +++ b/airflow/www/templates/airflow/list_dags.html @@ -147,7 +147,7 @@ - + http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/2a55ffe0/airflow/www/views.py -- diff --git a/airflow/www/views.py b/airflow/www/views.py index 6e2f1fc..c36d55f 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -7,9 +7,9 @@ # to you under the Apache License, Version 2.0 (the # "
incubator-airflow git commit: [AIRFLOW-2563] Fix PigCliHook Python 3 string/bytes use
Repository: incubator-airflow Updated Branches: refs/heads/master 096ba9ecd -> 3e7e42f02 [AIRFLOW-2563] Fix PigCliHook Python 3 string/bytes use Unit tests added for PigCliHook as well to prevent future issues. Closes #3594 from jakahn/master Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/3e7e42f0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/3e7e42f0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/3e7e42f0 Branch: refs/heads/master Commit: 3e7e42f028279a628d9e15d1ae4b6005593f8afb Parents: 096ba9e Author: Jasper Kahn Authored: Fri Jul 27 16:08:32 2018 -0700 Committer: Arthur Wiedmer Committed: Fri Jul 27 16:08:32 2018 -0700 -- airflow/hooks/pig_hook.py| 6 +- tests/hooks/test_pig_hook.py | 137 ++ 2 files changed, 140 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/3e7e42f0/airflow/hooks/pig_hook.py -- diff --git a/airflow/hooks/pig_hook.py b/airflow/hooks/pig_hook.py index a3836b1..fcfcc7f 100644 --- a/airflow/hooks/pig_hook.py +++ b/airflow/hooks/pig_hook.py @@ -55,7 +55,7 @@ class PigCliHook(BaseHook): with TemporaryDirectory(prefix='airflow_pigop_') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir) as f: -f.write(pig) +f.write(pig.encode('utf-8')) f.flush() fname = f.name pig_bin = 'pig' @@ -76,8 +76,8 @@ class PigCliHook(BaseHook): close_fds=True) self.sp = sp stdout = '' -for line in iter(sp.stdout.readline, ''): -stdout += line +for line in iter(sp.stdout.readline, b''): +stdout += line.decode('utf-8') if verbose: self.log.info(line.strip()) sp.wait() http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/3e7e42f0/tests/hooks/test_pig_hook.py -- diff --git a/tests/hooks/test_pig_hook.py b/tests/hooks/test_pig_hook.py new file mode 100644 index 000..c250d23 --- /dev/null +++ b/tests/hooks/test_pig_hook.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from airflow.hooks.pig_hook import PigCliHook + +try: +from unittest import mock +except ImportError: +try: +import mock +except ImportError: +mock = None + + +class TestPigCliHook(unittest.TestCase): + +def setUp(self): +super(TestPigCliHook, self).setUp() + +self.extra_dejson = mock.MagicMock() +self.extra_dejson.get.return_value = None +self.conn = mock.MagicMock() +self.conn.extra_dejson = self.extra_dejson +conn = self.conn + +class SubPigCliHook(PigCliHook): +def get_connection(self, id): +return conn + +self.pig_hook = SubPigCliHook + +def test_init(self): +self.pig_hook() +self.extra_dejson.get.assert_called_with('pig_properties', '') + +@mock.patch('subprocess.Popen') +def test_run_cli_success(self, popen_mock): +proc_mock = mock.MagicMock() +proc_mock.returncode = 0 +proc_mock.stdout.readline.return_value = b'' +popen_mock.return_value = proc_mock + +hook = self.pig_hook() +stdout = hook.run_cli("") + +self.assertEqual(stdout, "") + +@mock.patch('subprocess.Popen') +def test_run_cli_fail(self, popen_mock): +proc_mock = mock.MagicMock() +proc_mock.returncode = 1 +proc_mock.stdout.readline.return_value = b'' +
[jira] [Created] (AIRFLOW-885) Add Change.org to the list of Airflow users
Arthur Wiedmer created AIRFLOW-885: -- Summary: Add Change.org to the list of Airflow users Key: AIRFLOW-885 URL: https://issues.apache.org/jira/browse/AIRFLOW-885 Project: Apache Airflow Issue Type: Task Reporter: Arthur Wiedmer -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (AIRFLOW-885) Add Change.org to the list of Airflow users
[ https://issues.apache.org/jira/browse/AIRFLOW-885?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer updated AIRFLOW-885: --- Priority: Trivial (was: Major) > Add Change.org to the list of Airflow users > --- > > Key: AIRFLOW-885 > URL: https://issues.apache.org/jira/browse/AIRFLOW-885 > Project: Apache Airflow > Issue Type: Task >Reporter: Arthur Wiedmer >Priority: Trivial > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (AIRFLOW-916) Fix ConfigParser deprecation warning
[ https://issues.apache.org/jira/browse/AIRFLOW-916?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15891058#comment-15891058 ] Arthur Wiedmer commented on AIRFLOW-916: This was breaking things for me on 2.7.13 on a local fresh install. Let's revert. > Fix ConfigParser deprecation warning > - > > Key: AIRFLOW-916 > URL: https://issues.apache.org/jira/browse/AIRFLOW-916 > Project: Apache Airflow > Issue Type: Improvement >Affects Versions: 1.8.0 >Reporter: Jeremiah Lowin >Assignee: Jeremiah Lowin >Priority: Trivial > Fix For: 1.9.0 > > > ConfigParser.readfp() is deprecated in favor of ConfigParser.read_file(), > according to warning messages -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (AIRFLOW-846) Release schedule, latest tag is too old
[ https://issues.apache.org/jira/browse/AIRFLOW-846?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15894536#comment-15894536 ] Arthur Wiedmer commented on AIRFLOW-846: Hi [~ultrabug], We are on RC5 now, and will release to PyPI once the current blockers are cleared, and a new vote on the release is taken. All of this combined might take nother couple of weeks. Best, Arthur > Release schedule, latest tag is too old > --- > > Key: AIRFLOW-846 > URL: https://issues.apache.org/jira/browse/AIRFLOW-846 > Project: Apache Airflow > Issue Type: Task >Reporter: Ultrabug >Priority: Blocker > Labels: release, tagging > > To my understanding, there is no clear point about the release schedule of > the project. > The latest tag is 1.7.1.3 from June 2016, which is not well suited for > production now days. > For example, the latest available release is still affected by AIRFLOW-178 > which means that we have to patch the sources on production to work with ZIP > files. > Could you please share your thoughts and position on the release planning of > the project ? > Would it be possible to get a newer tag sometimes soon ? > Thank you -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Created] (AIRFLOW-947) Make PrestoHook surface better messages when the Presto Cluster is unavailable.
Arthur Wiedmer created AIRFLOW-947: -- Summary: Make PrestoHook surface better messages when the Presto Cluster is unavailable. Key: AIRFLOW-947 URL: https://issues.apache.org/jira/browse/AIRFLOW-947 Project: Apache Airflow Issue Type: Bug Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Priority: Minor -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (AIRFLOW-959) .gitignore file is disorganized and incomplete
[ https://issues.apache.org/jira/browse/AIRFLOW-959?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15903695#comment-15903695 ] Arthur Wiedmer commented on AIRFLOW-959: +1 > .gitignore file is disorganized and incomplete > -- > > Key: AIRFLOW-959 > URL: https://issues.apache.org/jira/browse/AIRFLOW-959 > Project: Apache Airflow > Issue Type: Bug >Reporter: George Leslie-Waksman >Assignee: George Leslie-Waksman >Priority: Trivial > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-959) .gitignore file is disorganized and incomplete
[ https://issues.apache.org/jira/browse/AIRFLOW-959?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-959. Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2136 [https://github.com/apache/incubator-airflow/pull/2136] > .gitignore file is disorganized and incomplete > -- > > Key: AIRFLOW-959 > URL: https://issues.apache.org/jira/browse/AIRFLOW-959 > Project: Apache Airflow > Issue Type: Bug >Reporter: George Leslie-Waksman >Assignee: George Leslie-Waksman >Priority: Trivial > Fix For: 1.9.0 > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-960) Add support for .editorconfig
[ https://issues.apache.org/jira/browse/AIRFLOW-960?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-960. Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2137 [https://github.com/apache/incubator-airflow/pull/2137] > Add support for .editorconfig > - > > Key: AIRFLOW-960 > URL: https://issues.apache.org/jira/browse/AIRFLOW-960 > Project: Apache Airflow > Issue Type: Improvement >Reporter: George Leslie-Waksman >Assignee: George Leslie-Waksman >Priority: Trivial > Fix For: 1.9.0 > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Created] (AIRFLOW-997) Change setup.cfg to point to Apache instead of Max
Arthur Wiedmer created AIRFLOW-997: -- Summary: Change setup.cfg to point to Apache instead of Max Key: AIRFLOW-997 URL: https://issues.apache.org/jira/browse/AIRFLOW-997 Project: Apache Airflow Issue Type: Improvement Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Priority: Minor -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-997) Change setup.cfg to point to Apache instead of Max
[ https://issues.apache.org/jira/browse/AIRFLOW-997?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-997. Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2162 [https://github.com/apache/incubator-airflow/pull/2162] > Change setup.cfg to point to Apache instead of Max > -- > > Key: AIRFLOW-997 > URL: https://issues.apache.org/jira/browse/AIRFLOW-997 > Project: Apache Airflow > Issue Type: Improvement >Reporter: Arthur Wiedmer > Assignee: Arthur Wiedmer >Priority: Minor > Fix For: 1.9.0 > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-999) Support for Redis database
[ https://issues.apache.org/jira/browse/AIRFLOW-999?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-999. Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2165 [https://github.com/apache/incubator-airflow/pull/2165] > Support for Redis database > -- > > Key: AIRFLOW-999 > URL: https://issues.apache.org/jira/browse/AIRFLOW-999 > Project: Apache Airflow > Issue Type: Improvement > Components: db >Reporter: msempere >Assignee: msempere >Priority: Minor > Labels: features > Fix For: 1.9.0 > > > Currently Airflow doesn't offer support for Redis DB. > The idea is to create a Hook to connect to it and offer a minimal > functionality. > So the proposal is to create a sensor that monitor for a Redis key existence. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-1007) Jinja sandbox is vulnerable to RCE
[ https://issues.apache.org/jira/browse/AIRFLOW-1007?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-1007. - Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2184 [https://github.com/apache/incubator-airflow/pull/2184] > Jinja sandbox is vulnerable to RCE > -- > > Key: AIRFLOW-1007 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1007 > Project: Apache Airflow > Issue Type: Bug >Reporter: Alex Guziel >Assignee: Alex Guziel > Fix For: 1.9.0 > > > Right now, the jinja template functionality in chart_data takes arbitrary > strings and executes them. We should use the sandbox functionality to prevent > this. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-1038) Specify celery serializers explicitly and pin version
[ https://issues.apache.org/jira/browse/AIRFLOW-1038?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-1038. - Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2185 [https://github.com/apache/incubator-airflow/pull/2185] > Specify celery serializers explicitly and pin version > - > > Key: AIRFLOW-1038 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1038 > Project: Apache Airflow > Issue Type: Bug >Reporter: Alex Guziel >Assignee: Alex Guziel > Fix For: 1.9.0 > > > Celery 3->4 upgrade changes the default task and result serializer from > pickle to json. Pickle is faster and supports more types > http://docs.celeryproject.org/en/latest/userguide/calling.html > This also causes issues when different versions of celery are running on > different hosts. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Created] (AIRFLOW-1066) Replace instances of airf...@airflow.com with airf...@example.com
Arthur Wiedmer created AIRFLOW-1066: --- Summary: Replace instances of airf...@airflow.com with airf...@example.com Key: AIRFLOW-1066 URL: https://issues.apache.org/jira/browse/AIRFLOW-1066 Project: Apache Airflow Issue Type: Bug Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Priority: Trivial airflow.com is a registered website to a company selling fans :) We can use example.com as a domain name. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (AIRFLOW-1067) Should not use airf...@airflow.com in examples
[ https://issues.apache.org/jira/browse/AIRFLOW-1067?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15955329#comment-15955329 ] Arthur Wiedmer commented on AIRFLOW-1067: - Duplicate of https://issues.apache.org/jira/browse/AIRFLOW-1066 We had the same idea. > Should not use airf...@airflow.com in examples > -- > > Key: AIRFLOW-1067 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1067 > Project: Apache Airflow > Issue Type: Bug >Reporter: Xiangrui Meng >Assignee: Xiangrui Meng >Priority: Minor > > airflow.com is owned by a company named Airflow (selling fans, etc). We > should use airf...@example.com in all examples. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-947) Make PrestoHook surface better messages when the Presto Cluster is unavailable.
[ https://issues.apache.org/jira/browse/AIRFLOW-947?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-947. Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2128 [https://github.com/apache/incubator-airflow/pull/2128] > Make PrestoHook surface better messages when the Presto Cluster is > unavailable. > --- > > Key: AIRFLOW-947 > URL: https://issues.apache.org/jira/browse/AIRFLOW-947 > Project: Apache Airflow > Issue Type: Bug > Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer >Priority: Minor > Fix For: 1.9.0 > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-1016) Allow HTTP HEAD request method on HTTPSensor
[ https://issues.apache.org/jira/browse/AIRFLOW-1016?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-1016. - Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2175 [https://github.com/apache/incubator-airflow/pull/2175] > Allow HTTP HEAD request method on HTTPSensor > > > Key: AIRFLOW-1016 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1016 > Project: Apache Airflow > Issue Type: Improvement >Reporter: msempere >Assignee: msempere >Priority: Minor > Labels: features > Fix For: 1.9.0 > > > HTTPSensor hardcodes the HTTP request method to `GET`, and could be the case > where `HEAD` method is needed to act as a sensor. > This case is useful when we just need to retrieve some meta data and not the > complete body for that particular request, and that metadata information is > enough for our sensor. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-1028) Databricks Operator for Airflow
[ https://issues.apache.org/jira/browse/AIRFLOW-1028?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-1028. - Resolution: Fixed Fix Version/s: 1.9.0 Issue resolved by pull request #2202 [https://github.com/apache/incubator-airflow/pull/2202] > Databricks Operator for Airflow > --- > > Key: AIRFLOW-1028 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1028 > Project: Apache Airflow > Issue Type: New Feature >Reporter: Andrew Chen >Assignee: Andrew Chen > Fix For: 1.9.0 > > > It would be nice to have a Databricks Operator/Hook in Airflow so users of > Databricks can more easily integrate with Airflow. > The operator would submit a spark job to our new /jobs/runs/submit endpoint. > This endpoint is similar to > https://docs.databricks.com/api/latest/jobs.html#jobscreatejob but does not > include the email_notifications, max_retries, min_retry_interval_millis, > retry_on_timeout, schedule, max_concurrent_runs fields. (The submit docs are > not out because it's still a private endpoint.) > Our proposed design for the operator then is to match this REST API endpoint. > Each argument to the parameter is named to be one of the fields of the REST > API request and the value of the argument will match the type expected by the > REST API. We will also merge extra keys from kwargs which should not be > passed to the BaseOperator into our API call in order to be flexible to > updates. > In the case that this interface is not very user friendly, we can later add > more operators which extend this operator. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Created] (AIRFLOW-1154) Always getting Please import from '
Arthur Zubarev created AIRFLOW-1154: --- Summary: Always getting Please import from 'https://issues.apache.org/jira/browse/AIRFLOW-1154 Project: Apache Airflow Issue Type: Bug Reporter: Arthur Zubarev Priority: Minor Un Ubuntu 16 LTS No matter how I import the module I am getting messages like /home/.../.local/lib/python2.7/site-packages/airflow/utils/helpers.py:406: DeprecationWarning: Importing PythonOperator directly from has been deprecated. Please import from '.[operator_module]' instead. Support for direct imports will be dropped entirely in Airflow 2.0. DeprecationWarning). The above is happening even when imported as 'from airflow.operators.python_operator import PythonOperator' -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (AIRFLOW-1165) airflow webservice crashes on ubuntu16 - python3
[ https://issues.apache.org/jira/browse/AIRFLOW-1165?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15993681#comment-15993681 ] Arthur Wiedmer commented on AIRFLOW-1165: - This is a duplicate of https://issues.apache.org/jira/browse/AIRFLOW-832 It is fixed in the current master, and will be fixed in the next release. The short term fix is the commands outlined here: http://stackoverflow.com/a/40857607 > airflow webservice crashes on ubuntu16 - python3 > - > > Key: AIRFLOW-1165 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1165 > Project: Apache Airflow > Issue Type: Bug >Reporter: Hamed > > I am trying to run airflow webserver on ubuntu16, python3 and ran to this > issue. Any idea? > {code} > [2017-05-02 16:36:34,789] [24096] {_internal.py:87} WARNING - * Debugger is > active! > [2017-05-02 16:36:34,790] [24096] {_internal.py:87} INFO - * Debugger PIN: > 294-518-137 > Exception in thread Thread-1: > Traceback (most recent call last): > File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner > self.run() > File "/usr/lib/python3.5/threading.py", line 862, in run > self._target(*self._args, **self._kwargs) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 696, in inner > fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 590, in make_server > passthrough_errors, ssl_context, fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 525, in __init__ > self.socket = ssl_context.wrap_socket(sock, server_side=True) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 447, in wrap_socket > ssl_version=self._protocol, **kwargs) > File "/usr/lib/python3.5/ssl.py", line 1069, in wrap_socket > ciphers=ciphers) > File "/usr/lib/python3.5/ssl.py", line 680, in __init__ > raise ValueError("certfile must be specified for server-side " > ValueError: certfile must be specified for server-side operations > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (AIRFLOW-1165) airflow webservice crashes on ubuntu16 - python3
[ https://issues.apache.org/jira/browse/AIRFLOW-1165?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer reassigned AIRFLOW-1165: --- Assignee: Arthur Wiedmer > airflow webservice crashes on ubuntu16 - python3 > - > > Key: AIRFLOW-1165 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1165 > Project: Apache Airflow > Issue Type: Bug >Reporter: Hamed > Assignee: Arthur Wiedmer > Fix For: 1.8.1 > > > I am trying to run airflow webserver on ubuntu16, python3 and ran to this > issue. Any idea? > {code} > [2017-05-02 16:36:34,789] [24096] {_internal.py:87} WARNING - * Debugger is > active! > [2017-05-02 16:36:34,790] [24096] {_internal.py:87} INFO - * Debugger PIN: > 294-518-137 > Exception in thread Thread-1: > Traceback (most recent call last): > File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner > self.run() > File "/usr/lib/python3.5/threading.py", line 862, in run > self._target(*self._args, **self._kwargs) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 696, in inner > fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 590, in make_server > passthrough_errors, ssl_context, fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 525, in __init__ > self.socket = ssl_context.wrap_socket(sock, server_side=True) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 447, in wrap_socket > ssl_version=self._protocol, **kwargs) > File "/usr/lib/python3.5/ssl.py", line 1069, in wrap_socket > ciphers=ciphers) > File "/usr/lib/python3.5/ssl.py", line 680, in __init__ > raise ValueError("certfile must be specified for server-side " > ValueError: certfile must be specified for server-side operations > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (AIRFLOW-1165) airflow webservice crashes on ubuntu16 - python3
[ https://issues.apache.org/jira/browse/AIRFLOW-1165?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-1165. - Resolution: Fixed Fix Version/s: 1.8.1 Resolved in master and the fix is in the current RC being voted on. > airflow webservice crashes on ubuntu16 - python3 > - > > Key: AIRFLOW-1165 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1165 > Project: Apache Airflow > Issue Type: Bug >Reporter: Hamed > Fix For: 1.8.1 > > > I am trying to run airflow webserver on ubuntu16, python3 and ran to this > issue. Any idea? > {code} > [2017-05-02 16:36:34,789] [24096] {_internal.py:87} WARNING - * Debugger is > active! > [2017-05-02 16:36:34,790] [24096] {_internal.py:87} INFO - * Debugger PIN: > 294-518-137 > Exception in thread Thread-1: > Traceback (most recent call last): > File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner > self.run() > File "/usr/lib/python3.5/threading.py", line 862, in run > self._target(*self._args, **self._kwargs) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 696, in inner > fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 590, in make_server > passthrough_errors, ssl_context, fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 525, in __init__ > self.socket = ssl_context.wrap_socket(sock, server_side=True) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 447, in wrap_socket > ssl_version=self._protocol, **kwargs) > File "/usr/lib/python3.5/ssl.py", line 1069, in wrap_socket > ciphers=ciphers) > File "/usr/lib/python3.5/ssl.py", line 680, in __init__ > raise ValueError("certfile must be specified for server-side " > ValueError: certfile must be specified for server-side operations > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Created] (AIRFLOW-1196) TriggerDagRunOperator should allow templated trigger_dag_id
Arthur Vigil created AIRFLOW-1196: - Summary: TriggerDagRunOperator should allow templated trigger_dag_id Key: AIRFLOW-1196 URL: https://issues.apache.org/jira/browse/AIRFLOW-1196 Project: Apache Airflow Issue Type: Improvement Components: operators Affects Versions: Airflow 1.8 Reporter: Arthur Vigil Assignee: Arthur Vigil Priority: Trivial TriggerDagRunOperator currently has no templated fields. Adding `trigger_dag_id` as a templated field should be a trivial change that improves its flexibility and usefulness. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (AIRFLOW-1196) Make trigger_dag_id a templated field of TriggerDagRunOperator
[ https://issues.apache.org/jira/browse/AIRFLOW-1196?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Vigil updated AIRFLOW-1196: -- Summary: Make trigger_dag_id a templated field of TriggerDagRunOperator (was: TriggerDagRunOperator should allow templated trigger_dag_id) > Make trigger_dag_id a templated field of TriggerDagRunOperator > -- > > Key: AIRFLOW-1196 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1196 > Project: Apache Airflow > Issue Type: Improvement > Components: operators >Affects Versions: Airflow 1.8 >Reporter: Arthur Vigil >Assignee: Arthur Vigil >Priority: Trivial > Labels: easyfix, improvement > > TriggerDagRunOperator currently has no templated fields. Adding > `trigger_dag_id` as a templated field should be a trivial change that > improves its flexibility and usefulness. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (AIRFLOW-1165) airflow webservice crashes on ubuntu16 - python3
[ https://issues.apache.org/jira/browse/AIRFLOW-1165?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16080698#comment-16080698 ] Arthur Wiedmer commented on AIRFLOW-1165: - A short fix until the version is upgraded can be the following At the prompt # Generating an RSA public/private-key pair openssl genrsa -out private.pem 2048 # Generating a self-signed certificate openssl req -new -x509 -key private.pem -out cacert.pem -days 1095 # In your airflow.cfg under [webserver] web_server_ssl_cert = path/to/cacert.pem web_server_ssl_key = path/to/private.pem > airflow webservice crashes on ubuntu16 - python3 > - > > Key: AIRFLOW-1165 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1165 > Project: Apache Airflow > Issue Type: Bug >Reporter: Hamed >Assignee: Arthur Wiedmer > Fix For: 1.9.0 > > > I am trying to run airflow webserver on ubuntu16, python3 and ran to this > issue. Any idea? > {code} > [2017-05-02 16:36:34,789] [24096] {_internal.py:87} WARNING - * Debugger is > active! > [2017-05-02 16:36:34,790] [24096] {_internal.py:87} INFO - * Debugger PIN: > 294-518-137 > Exception in thread Thread-1: > Traceback (most recent call last): > File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner > self.run() > File "/usr/lib/python3.5/threading.py", line 862, in run > self._target(*self._args, **self._kwargs) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 696, in inner > fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 590, in make_server > passthrough_errors, ssl_context, fd=fd) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 525, in __init__ > self.socket = ssl_context.wrap_socket(sock, server_side=True) > File "/usr/local/lib/python3.5/dist-packages/werkzeug/serving.py", line > 447, in wrap_socket > ssl_version=self._protocol, **kwargs) > File "/usr/lib/python3.5/ssl.py", line 1069, in wrap_socket > ciphers=ciphers) > File "/usr/lib/python3.5/ssl.py", line 680, in __init__ > raise ValueError("certfile must be specified for server-side " > ValueError: certfile must be specified for server-side operations > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Created] (AIRFLOW-109) PrestoHook get_pandas_df executes a method that can raise outside of the try catch statement.
Arthur Wiedmer created AIRFLOW-109: -- Summary: PrestoHook get_pandas_df executes a method that can raise outside of the try catch statement. Key: AIRFLOW-109 URL: https://issues.apache.org/jira/browse/AIRFLOW-109 Project: Apache Airflow Issue Type: Bug Components: hooks Affects Versions: Airflow 1.8, Airflow 1.7.1, Airflow 1.6.2 Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Priority: Minor This issue occurs when a malformed SQL statement is passed to the get_pandas_df method of the presto hook. Pyhive raises a DatabaseError outside of the try catch, leading in the wrong kind of error being raised. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-110) Point people to the approriate process to submit PRs in the repository's CONTRIBUTING.md
Arthur Wiedmer created AIRFLOW-110: -- Summary: Point people to the approriate process to submit PRs in the repository's CONTRIBUTING.md Key: AIRFLOW-110 URL: https://issues.apache.org/jira/browse/AIRFLOW-110 Project: Apache Airflow Issue Type: Task Components: docs Reporter: Arthur Wiedmer Priority: Trivial The current process to contribute code could be made more accessible. I am assuming that the entry point to the project is Github and the repository. We could modify the contributing.md as well as the read me to point to the proper way to do this. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-115) Migrate and Refactor AWS integration to use boto3 and better structured hooks
Arthur Wiedmer created AIRFLOW-115: -- Summary: Migrate and Refactor AWS integration to use boto3 and better structured hooks Key: AIRFLOW-115 URL: https://issues.apache.org/jira/browse/AIRFLOW-115 Project: Apache Airflow Issue Type: Improvement Components: AWS, boto3, hooks Reporter: Arthur Wiedmer Priority: Minor h2. Current State The current AWS integration is mostly done through the S3Hook, which uses non standard credentials parsing on top of using boto instead of boto3 which is the current supported AWS sdk for Python. h2. Proposal an AWSHook should be provided that maps Airflow connections to the boto3 API. Operators working with s3, as well as other AWS services would then inherit from this hook but extend the functionality with service specific methods like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for SES etc... AWSHook |_S3Hook |_EMRHook |_SQSHook |_SESHook ... -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (AIRFLOW-115) Migrate and Refactor AWS integration to use boto3 and better structured hooks
[ https://issues.apache.org/jira/browse/AIRFLOW-115?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer updated AIRFLOW-115: --- Description: h2. Current State The current AWS integration is mostly done through the S3Hook, which uses non standard credentials parsing on top of using boto instead of boto3 which is the current supported AWS sdk for Python. h2. Proposal an AWSHook should be provided that maps Airflow connections to the boto3 API. Operators working with s3, as well as other AWS services would then inherit from this hook but extend the functionality with service specific methods like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for SES etc... * AWSHook ** S3Hook ** EMRHook ** SQSHook ** SESHook ... was: h2. Current State The current AWS integration is mostly done through the S3Hook, which uses non standard credentials parsing on top of using boto instead of boto3 which is the current supported AWS sdk for Python. h2. Proposal an AWSHook should be provided that maps Airflow connections to the boto3 API. Operators working with s3, as well as other AWS services would then inherit from this hook but extend the functionality with service specific methods like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for SES etc... * AWSHook **S3Hook **EMRHook **SQSHook **SESHook ... > Migrate and Refactor AWS integration to use boto3 and better structured hooks > - > > Key: AIRFLOW-115 > URL: https://issues.apache.org/jira/browse/AIRFLOW-115 > Project: Apache Airflow > Issue Type: Improvement > Components: AWS, boto3, hooks > Reporter: Arthur Wiedmer >Priority: Minor > > h2. Current State > The current AWS integration is mostly done through the S3Hook, which uses non > standard credentials parsing on top of using boto instead of boto3 which is > the current supported AWS sdk for Python. > h2. Proposal > an AWSHook should be provided that maps Airflow connections to the boto3 API. > Operators working with s3, as well as other AWS services would then inherit > from this hook but extend the functionality with service specific methods > like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for > SES etc... > * AWSHook > ** S3Hook > ** EMRHook > ** SQSHook > ** SESHook > ... > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (AIRFLOW-115) Migrate and Refactor AWS integration to use boto3 and better structured hooks
[ https://issues.apache.org/jira/browse/AIRFLOW-115?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer updated AIRFLOW-115: --- Description: h2. Current State The current AWS integration is mostly done through the S3Hook, which uses non standard credentials parsing on top of using boto instead of boto3 which is the current supported AWS sdk for Python. h2. Proposal an AWSHook should be provided that maps Airflow connections to the boto3 API. Operators working with s3, as well as other AWS services would then inherit from this hook but extend the functionality with service specific methods like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for SES etc... * AWSHook **S3Hook **EMRHook **SQSHook **SESHook ... was: h2. Current State The current AWS integration is mostly done through the S3Hook, which uses non standard credentials parsing on top of using boto instead of boto3 which is the current supported AWS sdk for Python. h2. Proposal an AWSHook should be provided that maps Airflow connections to the boto3 API. Operators working with s3, as well as other AWS services would then inherit from this hook but extend the functionality with service specific methods like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for SES etc... AWSHook |_S3Hook |_EMRHook |_SQSHook |_SESHook ... > Migrate and Refactor AWS integration to use boto3 and better structured hooks > - > > Key: AIRFLOW-115 > URL: https://issues.apache.org/jira/browse/AIRFLOW-115 > Project: Apache Airflow > Issue Type: Improvement > Components: AWS, boto3, hooks > Reporter: Arthur Wiedmer >Priority: Minor > > h2. Current State > The current AWS integration is mostly done through the S3Hook, which uses non > standard credentials parsing on top of using boto instead of boto3 which is > the current supported AWS sdk for Python. > h2. Proposal > an AWSHook should be provided that maps Airflow connections to the boto3 API. > Operators working with s3, as well as other AWS services would then inherit > from this hook but extend the functionality with service specific methods > like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for > SES etc... > * AWSHook > **S3Hook > **EMRHook > **SQSHook > **SESHook > ... > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Assigned] (AIRFLOW-115) Migrate and Refactor AWS integration to use boto3 and better structured hooks
[ https://issues.apache.org/jira/browse/AIRFLOW-115?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer reassigned AIRFLOW-115: -- Assignee: Arthur Wiedmer > Migrate and Refactor AWS integration to use boto3 and better structured hooks > - > > Key: AIRFLOW-115 > URL: https://issues.apache.org/jira/browse/AIRFLOW-115 > Project: Apache Airflow > Issue Type: Improvement > Components: AWS, boto3, hooks > Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer >Priority: Minor > > h2. Current State > The current AWS integration is mostly done through the S3Hook, which uses non > standard credentials parsing on top of using boto instead of boto3 which is > the current supported AWS sdk for Python. > h2. Proposal > an AWSHook should be provided that maps Airflow connections to the boto3 API. > Operators working with s3, as well as other AWS services would then inherit > from this hook but extend the functionality with service specific methods > like get_key for S3, start_cluster for EMR, enqueue for SQS, send_email for > SES etc... > * AWSHook > ** S3Hook > ** EMRHook > ** SQSHook > ** SESHook > ... > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-180) Sensors do not test timeout correctly if timeout is more than a day/
Arthur Wiedmer created AIRFLOW-180: -- Summary: Sensors do not test timeout correctly if timeout is more than a day/ Key: AIRFLOW-180 URL: https://issues.apache.org/jira/browse/AIRFLOW-180 Project: Apache Airflow Issue Type: Bug Affects Versions: Airflow 1.7.1, Airflow 1.7.0, Airflow 1.6.2, Airflow 1.7.1.2 Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Fix For: Airflow 1.8 Currently the sensors tests the timedelta seconds, instead of the timedelta total_seconds -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-186) conn.literal is specific to MySQLdb, and should be factored out of the dbapi_hook
Arthur Wiedmer created AIRFLOW-186: -- Summary: conn.literal is specific to MySQLdb, and should be factored out of the dbapi_hook Key: AIRFLOW-186 URL: https://issues.apache.org/jira/browse/AIRFLOW-186 Project: Apache Airflow Issue Type: Bug Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (AIRFLOW-186) conn.literal is specific to MySQLdb, and should be factored out of the dbapi_hook
[ https://issues.apache.org/jira/browse/AIRFLOW-186?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15304496#comment-15304496 ] Arthur Wiedmer commented on AIRFLOW-186: [~john.bod...@gmail.com] FYI > conn.literal is specific to MySQLdb, and should be factored out of the > dbapi_hook > - > > Key: AIRFLOW-186 > URL: https://issues.apache.org/jira/browse/AIRFLOW-186 > Project: Apache Airflow > Issue Type: Bug >Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer > Original Estimate: 4h > Remaining Estimate: 4h > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Resolved] (AIRFLOW-186) conn.literal is specific to MySQLdb, and should be factored out of the dbapi_hook
[ https://issues.apache.org/jira/browse/AIRFLOW-186?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-186. Resolution: Fixed > conn.literal is specific to MySQLdb, and should be factored out of the > dbapi_hook > - > > Key: AIRFLOW-186 > URL: https://issues.apache.org/jira/browse/AIRFLOW-186 > Project: Apache Airflow > Issue Type: Bug > Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer > Original Estimate: 4h > Remaining Estimate: 4h > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (AIRFLOW-184) Add clear/mark success to CLI
[ https://issues.apache.org/jira/browse/AIRFLOW-184?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15317196#comment-15317196 ] Arthur Wiedmer commented on AIRFLOW-184: Sounds good to me. Ideally, this should need to be queued indeed. Should the commands mark_success just be a wrapper around a more general set_state? Marking large swath of tasks as success is a pain in the ui, and the backfill with regex matching was useful for this. But I agree that it does not make sense anymore and should be refactored into something more useful + that does not go through the scheduler as it is a waste of slots. > Add clear/mark success to CLI > - > > Key: AIRFLOW-184 > URL: https://issues.apache.org/jira/browse/AIRFLOW-184 > Project: Apache Airflow > Issue Type: Bug > Components: cli >Reporter: Chris Riccomini >Assignee: Joy Gao > > AIRFLOW-177 pointed out that the current CLI does not allow us to clear or > mark success a task (including upstream, downstream, past, future, and > recursive) the way that the UI widget does. Given a goal of keeping parity > between the UI and CLI, it seems like we should support this. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-263) Backtick file introduced by Highcharts refactor
Arthur Wiedmer created AIRFLOW-263: -- Summary: Backtick file introduced by Highcharts refactor Key: AIRFLOW-263 URL: https://issues.apache.org/jira/browse/AIRFLOW-263 Project: Apache Airflow Issue Type: Bug Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Priority: Minor A file named "`" was introduced during the Highcharts removal. See https://github.com/apache/incubator-airflow/commit/0a460081bc7cba2d05434148f092b87d35aa8cd3 My best assessment, is that this was a temporary file created by mistake. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (AIRFLOW-263) Backtick file introduced by Highcharts refactor
[ https://issues.apache.org/jira/browse/AIRFLOW-263?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15340552#comment-15340552 ] Arthur Wiedmer commented on AIRFLOW-263: [~bolke], you are the best to assess if this file is needed, but it looks like a temp file to me. > Backtick file introduced by Highcharts refactor > --- > > Key: AIRFLOW-263 > URL: https://issues.apache.org/jira/browse/AIRFLOW-263 > Project: Apache Airflow > Issue Type: Bug >Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer >Priority: Minor > > A file named "`" was introduced during the Highcharts removal. See > https://github.com/apache/incubator-airflow/commit/0a460081bc7cba2d05434148f092b87d35aa8cd3 > My best assessment, is that this was a temporary file created by mistake. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (AIRFLOW-263) Backtick file introduced by Highcharts refactor
[ https://issues.apache.org/jira/browse/AIRFLOW-263?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15340606#comment-15340606 ] Arthur Wiedmer commented on AIRFLOW-263: PR here : https://github.com/apache/incubator-airflow/pull/1613 > Backtick file introduced by Highcharts refactor > --- > > Key: AIRFLOW-263 > URL: https://issues.apache.org/jira/browse/AIRFLOW-263 > Project: Apache Airflow > Issue Type: Bug >Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer >Priority: Minor > > A file named "`" was introduced during the Highcharts removal. See > https://github.com/apache/incubator-airflow/commit/0a460081bc7cba2d05434148f092b87d35aa8cd3 > My best assessment, is that this was a temporary file created by mistake. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-264) Adding support for Hive queues.
Arthur Wiedmer created AIRFLOW-264: -- Summary: Adding support for Hive queues. Key: AIRFLOW-264 URL: https://issues.apache.org/jira/browse/AIRFLOW-264 Project: Apache Airflow Issue Type: Improvement Components: hive_hooks Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Priority: Minor Hive allows for queues to be set for workload management. We have started using them for multi-tenant management on our Hive cluster. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Resolved] (AIRFLOW-263) Backtick file introduced by Highcharts refactor
[ https://issues.apache.org/jira/browse/AIRFLOW-263?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-263. Resolution: Fixed > Backtick file introduced by Highcharts refactor > --- > > Key: AIRFLOW-263 > URL: https://issues.apache.org/jira/browse/AIRFLOW-263 > Project: Apache Airflow > Issue Type: Bug >Reporter: Arthur Wiedmer > Assignee: Arthur Wiedmer >Priority: Minor > > A file named "`" was introduced during the Highcharts removal. See > https://github.com/apache/incubator-airflow/commit/0a460081bc7cba2d05434148f092b87d35aa8cd3 > My best assessment, is that this was a temporary file created by mistake. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Resolved] (AIRFLOW-264) Adding support for Hive queues.
[ https://issues.apache.org/jira/browse/AIRFLOW-264?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-264. Resolution: Fixed > Adding support for Hive queues. > --- > > Key: AIRFLOW-264 > URL: https://issues.apache.org/jira/browse/AIRFLOW-264 > Project: Apache Airflow > Issue Type: Improvement > Components: hive_hooks > Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer >Priority: Minor > > Hive allows for queues to be set for workload management. We have started > using them for multi-tenant management on our Hive cluster. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (AIRFLOW-264) Adding support for Hive queues.
[ https://issues.apache.org/jira/browse/AIRFLOW-264?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer updated AIRFLOW-264: --- Fix Version/s: Airflow 1.8 > Adding support for Hive queues. > --- > > Key: AIRFLOW-264 > URL: https://issues.apache.org/jira/browse/AIRFLOW-264 > Project: Apache Airflow > Issue Type: Improvement > Components: hive_hooks > Reporter: Arthur Wiedmer >Assignee: Arthur Wiedmer >Priority: Minor > Fix For: Airflow 1.8 > > > Hive allows for queues to be set for workload management. We have started > using them for multi-tenant management on our Hive cluster. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (AIRFLOW-323) Should be able to prevent tasks from overlapping across multiple DAG Runs
[ https://issues.apache.org/jira/browse/AIRFLOW-323?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15371291#comment-15371291 ] Arthur Wiedmer commented on AIRFLOW-323: Hi Isaac, it sounds like there are a couple of things that could help you : 1) You can set max_active_runs for the DAG to 1 to ensure that only one dag run is active at a time. In this case, only one dag run will be executed at a time. 2) You can set depend_on_past to True such that this task will not execute unless the previous one completes. 3) Finally, you can make this DAG use a pool with one slot, such that this task basically takes a lock on this particular resource. Though ideally, if several tasks are competing for the same resource, you might not want to schedule them at a cadence that will introduce contention... > Should be able to prevent tasks from overlapping across multiple DAG Runs > - > > Key: AIRFLOW-323 > URL: https://issues.apache.org/jira/browse/AIRFLOW-323 > Project: Apache Airflow > Issue Type: Bug >Affects Versions: Airflow 1.7.1.2 > Environment: 1.7.1.2 >Reporter: Isaac Steele >Assignee: Isaac Steele > > As a the Airflow administrator, > If a task from a previous DAG Run is still running when the next scheduled > run triggers the same task, there should be a way prevent the tasks from > overlapping. > Otherwise the same code could end up running multiple times simultaneously. > To reproduce: > 1) Create a DAG with a short scheduled interval > 2) Create a task in that DAG to run longer than the interval > Result: Both tasks end up running that the same time. > This can cause tasks to compete for resources as well as duplicating or > overwriting what the other task is doing. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (AIRFLOW-497) Release plans & info
[ https://issues.apache.org/jira/browse/AIRFLOW-497?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15478185#comment-15478185 ] Arthur Wiedmer commented on AIRFLOW-497: Hi Alexander, I think I can leave a quick update here. While the committers and various contributors have worked on several improvements, we have been blocked on navigating our first apache release (a decent amount of contributors are new to this process and it takes a little getting used to). The main issues that the next release will address are licensing issues, stripping out components that were not compatible with the Apache License as well as a few bug fixes. We hope to be able to release more often in the future once we document the release process internally and make sure we are starting with the right base to be a successful project under the Apache umbrella. A general idea of the improvement roadmap can be found on the wiki : https://cwiki.apache.org/confluence/display/AIRFLOW/Roadmap Feel free to ping the dev mailing list also if you have more questions or want to start a conversation about releases. Best, Arthur > Release plans & info > > > Key: AIRFLOW-497 > URL: https://issues.apache.org/jira/browse/AIRFLOW-497 > Project: Apache Airflow > Issue Type: Wish > Components: core, docs >Reporter: Alexander Kachkaev >Priority: Minor > Labels: build, newbie, release > > I did a couple of experiments with airflow several months ago and returned to > explore it properly this week. After a few days of quite intensive reading > and hacking it still remains unclear to me what's going on with the project > ATM. > The latest release is 1.7.1.3, which dates back to 2016-06-13 (three months > from now). The docs on pythonhosted sometimes refer to 1.8 and git blame > reveals that these mentionings have been there since at least April 2016. > JIRA's dashboard has references to versions 1.8 and 2.0, but those only > contain lists with issues - no deadline etc. > I imagine that core developers have a clear picture about the situation and > it is probably possible to figure things out from the mailing list and > gitter, However, it would be good to see roadmap etc. in a slightly more > accessible way. > More frequent releases will help a lot as well. I'm seeing some issues when > running 1.7.1.3 via docker-airflow / celery, but it's totally unclear whether > these still exist on airflow's master branch or even something's wrong with > the docker wrapper I'm using. Opening an issue in JIRA seems somewhat stupid > in this situation. > Could anyone please increase the clarity of meta? -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Deleted] (AIRFLOW-555) !!!@ +1~855~338~0710 Xbox Livw One / 360 Customer Service Help Support Number
[ https://issues.apache.org/jira/browse/AIRFLOW-555?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer deleted AIRFLOW-555: --- > !!!@ +1~855~338~0710 Xbox Livw One / 360 Customer Service Help Support Number > - > > Key: AIRFLOW-555 > URL: https://issues.apache.org/jira/browse/AIRFLOW-555 > Project: Apache Airflow > Issue Type: Improvement > Environment: Xbox Live Solution Windows, Mac, Xbox One, Xbox 360 >Reporter: Techie James >Assignee: Techie James >Priority: Trivial > Labels: documentation, easyfix, features, windows > Original Estimate: 1h > Remaining Estimate: 1h > > http://www.xboxsupportnumber.com/ > +1-855-338-0710 Xbox Live Support Number > [Xbox Live Support Number 1 855 338 0710][1] > We're happy to help you 24/7 call us 1 855 338 0710 > Xbox Live Support Number 1 855 338 0710 > http://xboxlivesupportnume.site123.me/ > Manage your subscriptions > Do you need to manage your Xbox subscription? To manage your subscription, > sign in to your Microsoft account. From there, you can check your > subscription status, turn off auto renewal, or change how you pay for your > subscription. > Contact us Xbox Live Support Number 1 855 338 0710 > Trending topics > I need to manage my subscription > How a parent can change the privacy and online safety settings for a child > account > How to connect an Xbox One Controller to a Windows PC > How to set up an Xbox 360 Controller for Windows > Recover and reset your lost Microsoft account password > Connect a wireless Xbox One controller to your console > "I can't hear my friends online, or I can't join or host a multiplayer game > with my Xbox One" > Contact us Xbox Live Support Number 1 855 338 0710 > Community Support > Ambassador Forum Access Support > Networking Hardware Information > TV Hardware Information > Xbox 360 Support > Xbox Error Codes > Xbox on Mobile Devices Support > Xbox on Windows Support > Xbox One Support > Xbox Rewards Support > Xbox Live Support Number 1 855 338 0710 -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-575) Improve tutorial information about default_args
Arthur Wiedmer created AIRFLOW-575: -- Summary: Improve tutorial information about default_args Key: AIRFLOW-575 URL: https://issues.apache.org/jira/browse/AIRFLOW-575 Project: Apache Airflow Issue Type: Improvement Components: Documentation Reporter: Laura Lorenz Assignee: Laura Lorenz Priority: Minor -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Resolved] (AIRFLOW-575) Improve tutorial information about default_args
[ https://issues.apache.org/jira/browse/AIRFLOW-575?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-575. Resolution: Fixed > Improve tutorial information about default_args > --- > > Key: AIRFLOW-575 > URL: https://issues.apache.org/jira/browse/AIRFLOW-575 > Project: Apache Airflow > Issue Type: Improvement > Components: Documentation >Reporter: Laura Lorenz >Assignee: Laura Lorenz >Priority: Minor > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (AIRFLOW-682) Bump MAX_PERIODS
[ https://issues.apache.org/jira/browse/AIRFLOW-682?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15729673#comment-15729673 ] Arthur Wiedmer commented on AIRFLOW-682: +1. Very useful for large-ish DAGs > 1k tasks as this limit applies also for the max number of tasks when marking upstream or downstream success. > Bump MAX_PERIODS > > > Key: AIRFLOW-682 > URL: https://issues.apache.org/jira/browse/AIRFLOW-682 > Project: Apache Airflow > Issue Type: Bug >Reporter: Dan Davydov >Assignee: Dan Davydov > > It is not possible to mark success on some large DAGs due to the MAX_PERIODS > being set to 1000. We should temporarily bump it up until work can be done to > scale the mark success endpoint much higher. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (AIRFLOW-731) NamedHivePartitionSensor chokes on partition predicate with periods.
Arthur Wiedmer created AIRFLOW-731: -- Summary: NamedHivePartitionSensor chokes on partition predicate with periods. Key: AIRFLOW-731 URL: https://issues.apache.org/jira/browse/AIRFLOW-731 Project: Apache Airflow Issue Type: Bug Affects Versions: Airflow 1.7.1, Airflow 1.7.0 Reporter: Arthur Wiedmer Assignee: Arthur Wiedmer Priority: Trivial The partition parsing function did not limit splitting around the first period leading to issues. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (AIRFLOW-1196) Make trigger_dag_id a templated field of TriggerDagRunOperator
[ https://issues.apache.org/jira/browse/AIRFLOW-1196?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Vigil updated AIRFLOW-1196: -- Affects Version/s: (was: Airflow 1.8) Airflow 2.0 > Make trigger_dag_id a templated field of TriggerDagRunOperator > -- > > Key: AIRFLOW-1196 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1196 > Project: Apache Airflow > Issue Type: Improvement > Components: operators > Reporter: Arthur Vigil >Assignee: Arthur Vigil >Priority: Trivial > Labels: easyfix, improvement > > TriggerDagRunOperator currently has no templated fields. Adding > `trigger_dag_id` as a templated field should be a trivial change that > improves its flexibility and usefulness. -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Updated] (AIRFLOW-1196) Make trigger_dag_id a templated field of TriggerDagRunOperator
[ https://issues.apache.org/jira/browse/AIRFLOW-1196?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Vigil updated AIRFLOW-1196: -- Affects Version/s: (was: Airflow 2.0) > Make trigger_dag_id a templated field of TriggerDagRunOperator > -- > > Key: AIRFLOW-1196 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1196 > Project: Apache Airflow > Issue Type: Improvement > Components: operators > Reporter: Arthur Vigil >Assignee: Arthur Vigil >Priority: Trivial > Labels: easyfix, improvement > > TriggerDagRunOperator currently has no templated fields. Adding > `trigger_dag_id` as a templated field should be a trivial change that > improves its flexibility and usefulness. -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Created] (AIRFLOW-1667) Remote log handlers don't upload logs
Arthur Vigil created AIRFLOW-1667: - Summary: Remote log handlers don't upload logs Key: AIRFLOW-1667 URL: https://issues.apache.org/jira/browse/AIRFLOW-1667 Project: Apache Airflow Issue Type: Bug Components: logging Affects Versions: 1.9.0, 1.10.0 Reporter: Arthur Vigil AIRFLOW-1385 revised logging for configurability, but the provided remote log handlers (S3TaskHandler and GCSTaskHandler) only upload on close (flush is left at the default implementation provided by `logging.FileHandler`). A handler will be closed on process exit by `logging.shutdown()`, but depending on the Executor used worker processes may not regularly shutdown, and can very likely persist between tasks. This means during normal execution log files are never uploaded. Need to find a way to flush remote log handlers in a timely manner, but without hitting the target resources unnecessarily. -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Created] (AIRFLOW-1682) S3 task handler never writes to S3
Arthur Vigil created AIRFLOW-1682: - Summary: S3 task handler never writes to S3 Key: AIRFLOW-1682 URL: https://issues.apache.org/jira/browse/AIRFLOW-1682 Project: Apache Airflow Issue Type: Bug Affects Versions: 1.9.0 Reporter: Arthur Vigil Assignee: Arthur Vigil S3TaskHandler has the same problem as the GCSTaskHandler reported in AIRFLOW-1676, where the log never gets uploaded because _hook is never set -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Commented] (AIRFLOW-1669) Fix Docker import in Master
[ https://issues.apache.org/jira/browse/AIRFLOW-1669?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16195510#comment-16195510 ] Arthur Vigil commented on AIRFLOW-1669: --- master is building but the docker import is still wrong which means DockerOperator is broken. The correct import if using the older docker-py library is {noformat} from docker import Client, tls {noformat} > Fix Docker import in Master > --- > > Key: AIRFLOW-1669 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1669 > Project: Apache Airflow > Issue Type: Bug >Reporter: Fokko Driesprong > Fix For: 1.9.0, 1.8.3 > > > Hi all, > Currently master is failing due a wrong dependency. I would like to revert > this and move back to the docker dependency. > Cheers, Fokko -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Commented] (AIRFLOW-1669) Fix Docker import in Master
[ https://issues.apache.org/jira/browse/AIRFLOW-1669?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16195548#comment-16195548 ] Arthur Vigil commented on AIRFLOW-1669: --- I think I have narrowed down the issue to a stale Travis cache. The AIRFLOW-1368 revert went fine, but Travis did not invalidate the cached wheel `docker` package was being installed from. This created the appearance of a bad import because the unit tests were being run with a different version of the docker package Should be fixed in https://github.com/apache/incubator-airflow/pull/2672 once the Travis cache is cleared > Fix Docker import in Master > --- > > Key: AIRFLOW-1669 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1669 > Project: Apache Airflow > Issue Type: Bug >Reporter: Fokko Driesprong > Fix For: 1.9.0, 1.8.3 > > > Hi all, > Currently master is failing due a wrong dependency. I would like to revert > this and move back to the docker dependency. > Cheers, Fokko -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Updated] (AIRFLOW-1667) Remote log handlers don't upload logs on task finish
[ https://issues.apache.org/jira/browse/AIRFLOW-1667?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Vigil updated AIRFLOW-1667: -- Summary: Remote log handlers don't upload logs on task finish (was: Remote log handlers don't upload logs) > Remote log handlers don't upload logs on task finish > > > Key: AIRFLOW-1667 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1667 > Project: Apache Airflow > Issue Type: Bug > Components: logging >Affects Versions: 1.9.0, 1.10.0 >Reporter: Arthur Vigil > > AIRFLOW-1385 revised logging for configurability, but the provided remote log > handlers (S3TaskHandler and GCSTaskHandler) only upload on close (flush is > left at the default implementation provided by `logging.FileHandler`). A > handler will be closed on process exit by `logging.shutdown()`, but depending > on the Executor used worker processes may not regularly shutdown, and can > very likely persist between tasks. This means during normal execution log > files are never uploaded. > Need to find a way to flush remote log handlers in a timely manner, but > without hitting the target resources unnecessarily. -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Commented] (AIRFLOW-1667) Remote log handlers don't upload logs on task finish
[ https://issues.apache.org/jira/browse/AIRFLOW-1667?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16195609#comment-16195609 ] Arthur Vigil commented on AIRFLOW-1667: --- AIRFLOW-1676 and AIRFLOW-1682 actually fix an underlying problem with the remote log handlers that was causing logs to _never_ be uploaded, so the problem isn't quite as bad as I thought. It would still be nice if we could replicate the behavior of the old logging system in getting task logs pushed in a timely manner as right now the timing is unpredictable. > Remote log handlers don't upload logs on task finish > > > Key: AIRFLOW-1667 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1667 > Project: Apache Airflow > Issue Type: Bug > Components: logging >Affects Versions: 1.9.0, 1.10.0 >Reporter: Arthur Vigil > > AIRFLOW-1385 revised logging for configurability, but the provided remote log > handlers (S3TaskHandler and GCSTaskHandler) only upload on close (flush is > left at the default implementation provided by `logging.FileHandler`). A > handler will be closed on process exit by `logging.shutdown()`, but depending > on the Executor used worker processes may not regularly shutdown, and can > very likely persist between tasks. This means during normal execution log > files are never uploaded. > Need to find a way to flush remote log handlers in a timely manner, but > without hitting the target resources unnecessarily. -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Resolved] (AIRFLOW-1235) Odd behaviour when all gunicorn workers die
[ https://issues.apache.org/jira/browse/AIRFLOW-1235?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Arthur Wiedmer resolved AIRFLOW-1235. - Resolution: Fixed Fix Version/s: 2.0.0 Issue resolved by pull request #2330 [https://github.com/apache/incubator-airflow/pull/2330] > Odd behaviour when all gunicorn workers die > --- > > Key: AIRFLOW-1235 > URL: https://issues.apache.org/jira/browse/AIRFLOW-1235 > Project: Apache Airflow > Issue Type: Bug > Components: webserver >Affects Versions: 1.8.0 >Reporter: Erik Forsberg >Assignee: Kengo Seki >Priority: Major > Fix For: 2.0.0 > > > The webserver has sometimes stopped responding to port 443, and today I found > the issue - I had a misconfigured resolv.conf that made it unable to talk to > my postgresql. This was the root cause, but the way airflow webserver behaved > was a bit odd. > It seems that when all gunicorn workers failed to start, the gunicorn master > shut down. However, the main process (the one that starts gunicorn master) > did not shut down, so there was no way of detecting the failed status of > webserver from e.g. systemd or init script. > Full traceback leading to stale webserver process: > {noformat} > May 21 09:51:57 airmaster01 airflow[26451]: [2017-05-21 09:51:57 +] > [23794] [ERROR] Exception in worker process: > May 21 09:51:57 airmaster01 airflow[26451]: Traceback (most recent call last): > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 1122, in _do_get > May 21 09:51:57 airmaster01 airflow[26451]: return self._pool.get(wait, > self._timeout) > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/util/queue.py", > line 145, in get > May 21 09:51:57 airmaster01 airflow[26451]: raise Empty > May 21 09:51:57 airmaster01 airflow[26451]: sqlalchemy.util.queue.Empty > May 21 09:51:57 airmaster01 airflow[26451]: During handling of the above > exception, another exception occurred: > May 21 09:51:57 airmaster01 airflow[26451]: Traceback (most recent call last): > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/engine/base.py", > line 2147, in _wrap_pool_connect > May 21 09:51:57 airmaster01 airflow[26451]: return fn() > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 387, in connect > May 21 09:51:57 airmaster01 airflow[26451]: return > _ConnectionFairy._checkout(self) > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 766, in _checkout > May 21 09:51:57 airmaster01 airflow[26451]: fairy = > _ConnectionRecord.checkout(pool) > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 516, in checkout > May 21 09:51:57 airmaster01 airflow[26451]: rec = pool._do_get() > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 1138, in _do_get > May 21 09:51:57 airmaster01 airflow[26451]: self._dec_overflow() > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/util/langhelpers.py", > line 66, in __exit__ > May 21 09:51:57 airmaster01 airflow[26451]: compat.reraise(exc_type, > exc_value, exc_tb) > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/util/compat.py", > line 187, in reraise > May 21 09:51:57 airmaster01 airflow[26451]: raise value > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 1135, in _do_get > May 21 09:51:57 airmaster01 airflow[26451]: return self._create_connection() > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 333, in _create_connection > May 21 09:51:57 airmaster01 airflow[26451]: return _ConnectionRecord(self) > May 21 09:51:57 airmaster01 airflow[26451]: File > "/opt/airflow/production/lib/python3.4/site-packages/sqlalchemy/pool.py", > line 461, in __init__ > May 21 09:51:57 ai