This is an automated email from the ASF dual-hosted git repository. jedcunningham pushed a commit to branch v2-2-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit a0ea47e4ebabcbae399b9063b38c44e1f26bcc4c Author: Jed Cunningham <[email protected]> AuthorDate: Tue Feb 15 10:57:46 2022 -0700 Add a session backend to store session data in the database (#21478) Co-authored-by: Jed Cunningham <[email protected]> (cherry picked from commit da9d0863c7ff121c111a455708163b026943bdf1) --- airflow/config_templates/config.yml | 7 +++ airflow/config_templates/default_airflow.cfg | 4 ++ .../c381b21cb7e4_add_session_table_to_db.py | 54 ++++++++++++++++++ airflow/utils/db.py | 3 + airflow/www/app.py | 3 +- airflow/www/extensions/init_session.py | 63 ++++++++++++--------- .../www/{extensions/init_session.py => session.py} | 29 ++++------ docs/apache-airflow/migrations-ref.rst | 4 +- docs/spelling_wordlist.txt | 1 + setup.cfg | 3 + tests/api_connexion/conftest.py | 7 ++- tests/api_connexion/test_security.py | 4 ++ tests/test_utils/decorators.py | 2 +- tests/utils/test_db.py | 3 + tests/www/views/conftest.py | 1 + tests/www/views/test_session.py | 65 ++++++++++++++++++++++ 16 files changed, 205 insertions(+), 48 deletions(-) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 6941f03..1e77041 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -999,6 +999,13 @@ type: string example: ~ default: "" + - name: session_backend + description: | + The type of backend used to store web session data, can be 'database' or 'securecookie' + version_added: 2.2.4 + type: string + example: securecookie + default: database - name: web_server_master_timeout description: | Number of seconds the webserver waits before killing gunicorn master that doesn't respond diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index 6a5449b..826eaf4 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -516,6 +516,10 @@ web_server_ssl_cert = # provided SSL will be enabled. This does not change the web server port. web_server_ssl_key = +# The type of backend used to store web session data, can be 'database' or 'securecookie' +# Example: session_backend = securecookie +session_backend = database + # Number of seconds the webserver waits before killing gunicorn master that doesn't respond web_server_master_timeout = 120 diff --git a/airflow/migrations/versions/c381b21cb7e4_add_session_table_to_db.py b/airflow/migrations/versions/c381b21cb7e4_add_session_table_to_db.py new file mode 100644 index 0000000..cc6b9ab --- /dev/null +++ b/airflow/migrations/versions/c381b21cb7e4_add_session_table_to_db.py @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""add session table to db + +Revision ID: c381b21cb7e4 +Revises: be2bfac3da23 +Create Date: 2022-01-25 13:56:35.069429 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = 'c381b21cb7e4' +down_revision = 'be2bfac3da23' +branch_labels = None +depends_on = None + +TABLE_NAME = 'session' + + +def upgrade(): + """Apply add session table to db""" + op.create_table( + TABLE_NAME, + sa.Column('id', sa.Integer()), + sa.Column('session_id', sa.String(255)), + sa.Column('data', sa.LargeBinary()), + sa.Column('expiry', sa.DateTime()), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('session_id'), + ) + + +def downgrade(): + """Unapply add session table to db""" + op.drop_table(TABLE_NAME) diff --git a/airflow/utils/db.py b/airflow/utils/db.py index 023f482..c038d66 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -954,9 +954,12 @@ def drop_airflow_models(connection): users.drop(settings.engine, checkfirst=True) dag_stats = Table('dag_stats', Base.metadata) dag_stats.drop(settings.engine, checkfirst=True) + session = Table('session', Base.metadata) + session.drop(settings.engine, checkfirst=True) Base.metadata.drop_all(connection) # we remove the Tables here so that if resetdb is run metadata does not keep the old tables. + Base.metadata.remove(session) Base.metadata.remove(dag_stats) Base.metadata.remove(users) Base.metadata.remove(user) diff --git a/airflow/www/app.py b/airflow/www/app.py index 2de041b..16780cb 100644 --- a/airflow/www/app.py +++ b/airflow/www/app.py @@ -36,7 +36,7 @@ from airflow.www.extensions.init_jinja_globals import init_jinja_globals from airflow.www.extensions.init_manifest_files import configure_manifest_files from airflow.www.extensions.init_robots import init_robots from airflow.www.extensions.init_security import init_api_experimental_auth, init_xframe_protection -from airflow.www.extensions.init_session import init_airflow_session_interface, init_permanent_session +from airflow.www.extensions.init_session import init_airflow_session_interface from airflow.www.extensions.init_views import ( init_api_connexion, init_api_experimental, @@ -135,7 +135,6 @@ def create_app(config=None, testing=False): init_jinja_globals(flask_app) init_xframe_protection(flask_app) - init_permanent_session(flask_app) init_airflow_session_interface(flask_app) return flask_app diff --git a/airflow/www/extensions/init_session.py b/airflow/www/extensions/init_session.py index 06e0ba5..7a09de7 100644 --- a/airflow/www/extensions/init_session.py +++ b/airflow/www/extensions/init_session.py @@ -15,33 +15,46 @@ # specific language governing permissions and limitations # under the License. -from flask import request, session as flask_session -from flask.sessions import SecureCookieSessionInterface +from flask import session as builtin_flask_session - -class AirflowSessionInterface(SecureCookieSessionInterface): - """ - Airflow cookie session interface. - Modifications of sessions should be done here because - the change here is global. - """ - - def save_session(self, *args, **kwargs): - """Prevent creating session from REST API requests.""" - if request.blueprint == '/api/v1': - return None - return super().save_session(*args, **kwargs) - - -def init_permanent_session(app): - """Make session permanent to allows us to store data""" - - def make_session_permanent(): - flask_session.permanent = True - - app.before_request(make_session_permanent) +from airflow.configuration import conf +from airflow.exceptions import AirflowConfigException +from airflow.www.session import AirflowDatabaseSessionInterface, AirflowSecureCookieSessionInterface def init_airflow_session_interface(app): """Set airflow session interface""" - app.session_interface = AirflowSessionInterface() + config = app.config.copy() + selected_backend = conf.get('webserver', 'SESSION_BACKEND') + # A bit of a misnomer - normally cookies expire whenever the browser is closed + # or when they hit their expiry datetime, whichever comes first. "Permanent" + # cookies only expire when they hit their expiry datetime, and can outlive + # the browser being closed. + permanent_cookie = config.get('SESSION_PERMANENT', True) + + if selected_backend == 'securecookie': + app.session_interface = AirflowSecureCookieSessionInterface() + if permanent_cookie: + + def make_session_permanent(): + builtin_flask_session.permanent = True + + app.before_request(make_session_permanent) + elif selected_backend == 'database': + app.session_interface = AirflowDatabaseSessionInterface( + app=app, + db=None, + permanent=permanent_cookie, + # Typically these would be configurable with Flask-Session, + # but we will set them explicitly instead as they don't make + # sense to have configurable in Airflow's use case + table='session', + key_prefix='', + use_signer=True, + ) + else: + raise AirflowConfigException( + "Unrecognized session backend specified in " + f"web_server_session_backend: '{selected_backend}'. Please set " + "this to either 'database' or 'securecookie'." + ) diff --git a/airflow/www/extensions/init_session.py b/airflow/www/session.py similarity index 59% copy from airflow/www/extensions/init_session.py copy to airflow/www/session.py index 06e0ba5..4092565 100644 --- a/airflow/www/extensions/init_session.py +++ b/airflow/www/session.py @@ -15,33 +15,26 @@ # specific language governing permissions and limitations # under the License. -from flask import request, session as flask_session +from flask import request from flask.sessions import SecureCookieSessionInterface +from flask_session.sessions import SqlAlchemySessionInterface -class AirflowSessionInterface(SecureCookieSessionInterface): - """ - Airflow cookie session interface. - Modifications of sessions should be done here because - the change here is global. - """ +class SesssionExemptMixin: + """Exempt certain blueprints/paths from autogenerated sessions""" def save_session(self, *args, **kwargs): - """Prevent creating session from REST API requests.""" + """Prevent creating session from REST API and health requests.""" if request.blueprint == '/api/v1': return None + if request.path == '/health': + return None return super().save_session(*args, **kwargs) -def init_permanent_session(app): - """Make session permanent to allows us to store data""" - - def make_session_permanent(): - flask_session.permanent = True - - app.before_request(make_session_permanent) +class AirflowDatabaseSessionInterface(SesssionExemptMixin, SqlAlchemySessionInterface): + """Session interface that exempts some routes and stores session data in the database""" -def init_airflow_session_interface(app): - """Set airflow session interface""" - app.session_interface = AirflowSessionInterface() +class AirflowSecureCookieSessionInterface(SesssionExemptMixin, SecureCookieSessionInterface): + """Session interface that exempts some routes and stores session data in a signed cookie""" diff --git a/docs/apache-airflow/migrations-ref.rst b/docs/apache-airflow/migrations-ref.rst index 016c624..8dc1a55 100644 --- a/docs/apache-airflow/migrations-ref.rst +++ b/docs/apache-airflow/migrations-ref.rst @@ -23,7 +23,9 @@ Here's the list of all the Database Migrations that are executed via when you ru +--------------------------------+------------------+-----------------+---------------------------------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +--------------------------------+------------------+-----------------+---------------------------------------------------------------------------------------+ -| ``be2bfac3da23`` (head) | ``7b2661a43ba3`` | ``2.2.3`` | Add has_import_errors column to DagModel | +| ``c381b21cb7e4`` (head) | ``be2bfac3da23`` | ``2.2.4`` | Create a ``session`` table to store web session data | ++--------------------------------+------------------+-----------------+---------------------------------------------------------------------------------------+ +| ``be2bfac3da23`` | ``7b2661a43ba3`` | ``2.2.3`` | Add has_import_errors column to DagModel | +--------------------------------+------------------+-----------------+---------------------------------------------------------------------------------------+ | ``7b2661a43ba3`` | ``142555e44c17`` | ``2.2.0`` | Change ``TaskInstance`` and ``TaskReschedule`` tables from execution_date to run_id. | +--------------------------------+------------------+-----------------+---------------------------------------------------------------------------------------+ diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 5d77e29..ed114b6 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1222,6 +1222,7 @@ sdk secretRef secretRefs securable +securecookie securityManager seealso seedlist diff --git a/setup.cfg b/setup.cfg index 7ab5c77..8e36d06 100644 --- a/setup.cfg +++ b/setup.cfg @@ -107,6 +107,9 @@ install_requires = flask-appbuilder>=3.3.4, <4.0.0 flask-caching>=1.5.0, <2.0.0 flask-login>=0.3, <0.5 + # Strict upper-bound on the latest release of flask-session, + # as any schema changes will require a migration. + flask-session>=0.3.1, <=0.4.0 flask-wtf>=0.14.3, <0.15 graphviz>=0.12 gunicorn>=20.1.0 diff --git a/tests/api_connexion/conftest.py b/tests/api_connexion/conftest.py index cc92733..9b37b52 100644 --- a/tests/api_connexion/conftest.py +++ b/tests/api_connexion/conftest.py @@ -25,7 +25,12 @@ from tests.test_utils.decorators import dont_initialize_flask_app_submodules @pytest.fixture(scope="session") def minimal_app_for_api(): @dont_initialize_flask_app_submodules( - skip_all_except=["init_appbuilder", "init_api_experimental_auth", "init_api_connexion"] + skip_all_except=[ + "init_appbuilder", + "init_api_experimental_auth", + "init_api_connexion", + "init_airflow_session_interface", + ] ) def factory(): with conf_vars({("api", "auth_backend"): "tests.test_utils.remote_user_api_auth_backend"}): diff --git a/tests/api_connexion/test_security.py b/tests/api_connexion/test_security.py index 244a8a2..68f6d31 100644 --- a/tests/api_connexion/test_security.py +++ b/tests/api_connexion/test_security.py @@ -45,3 +45,7 @@ class TestSession: def test_session_not_created_on_api_request(self): self.client.get("api/v1/dags", environ_overrides={'REMOTE_USER': "test"}) assert all(cookie.name != "session" for cookie in self.client.cookie_jar) + + def test_session_not_created_on_health_endpoint_request(self): + self.client.get("health") + assert all(cookie.name != "session" for cookie in self.client.cookie_jar) diff --git a/tests/test_utils/decorators.py b/tests/test_utils/decorators.py index d08d159..949df63 100644 --- a/tests/test_utils/decorators.py +++ b/tests/test_utils/decorators.py @@ -42,7 +42,7 @@ def dont_initialize_flask_app_submodules(_func=None, *, skip_all_except=None): "sync_appbuilder_roles", "init_jinja_globals", "init_xframe_protection", - "init_permanent_session", + "init_airflow_session_interface", "init_appbuilder", ] diff --git a/tests/utils/test_db.py b/tests/utils/test_db.py index 601dc6f..27fa67b 100644 --- a/tests/utils/test_db.py +++ b/tests/utils/test_db.py @@ -74,6 +74,9 @@ class TestDb(unittest.TestCase): lambda t: (t[0] == 'remove_table' and t[1].name == 'spt_fallback_usg'), lambda t: (t[0] == 'remove_table' and t[1].name == 'MSreplication_options'), lambda t: (t[0] == 'remove_table' and t[1].name == 'spt_fallback_dev'), + # Ignore flask-session table/index + lambda t: (t[0] == 'remove_table' and t[1].name == 'session'), + lambda t: (t[0] == 'remove_index' and t[1].name == 'session_id'), ] for ignore in ignores: diff = [d for d in diff if not ignore(d)] diff --git a/tests/www/views/conftest.py b/tests/www/views/conftest.py index 05fe1e4..f95a814 100644 --- a/tests/www/views/conftest.py +++ b/tests/www/views/conftest.py @@ -55,6 +55,7 @@ def app(examples_dag_bag): "init_flash_views", "init_jinja_globals", "init_plugins", + "init_airflow_session_interface", ] ) def factory(): diff --git a/tests/www/views/test_session.py b/tests/www/views/test_session.py new file mode 100644 index 0000000..9fb6f36 --- /dev/null +++ b/tests/www/views/test_session.py @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from airflow.exceptions import AirflowConfigException +from airflow.www import app +from tests.test_utils.config import conf_vars +from tests.test_utils.decorators import dont_initialize_flask_app_submodules + + +def test_session_cookie_created_on_login(user_client): + assert any(cookie.name == 'session' for cookie in user_client.cookie_jar) + + +def test_session_inaccessible_after_logout(user_client): + session_cookie = next((cookie for cookie in user_client.cookie_jar if cookie.name == 'session'), None) + assert session_cookie is not None + + resp = user_client.get('/logout/') + assert resp.status_code == 302 + + # Try to access /home with the session cookie from earlier + user_client.set_cookie('session', session_cookie.value) + user_client.get('/home/') + assert resp.status_code == 302 + + +def test_invalid_session_backend_option(): + @dont_initialize_flask_app_submodules( + skip_all_except=[ + "init_api_connexion", + "init_appbuilder", + "init_appbuilder_links", + "init_appbuilder_views", + "init_flash_views", + "init_jinja_globals", + "init_plugins", + "init_airflow_session_interface", + ] + ) + def poorly_configured_app_factory(): + with conf_vars({("webserver", "session_backend"): "invalid_value_for_session_backend"}): + return app.create_app(testing=True) + + expected_exc_regex = ( + "^Unrecognized session backend specified in web_server_session_backend: " + r"'invalid_value_for_session_backend'\. Please set this to .+\.$" + ) + with pytest.raises(AirflowConfigException, match=expected_exc_regex): + poorly_configured_app_factory()
