This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch v1-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v1-10-test by this push:
     new dd0f252  Webserver: Sanitize string passed to origin param (#14738)
dd0f252 is described below

commit dd0f252402eafb20d04d9b0cb9edce8d03dfd479
Author: Kaxil Naik <[email protected]>
AuthorDate: Fri Mar 12 15:25:02 2021 +0000

    Webserver: Sanitize string passed to origin param (#14738)
    
    (cherry-picked from 409c249121bd9c8902fc2ba551b21873ab41f953)
---
 airflow/www/views.py         | 12 +++++++++++-
 airflow/www_rbac/views.py    | 13 ++++++++++++-
 tests/www/test_views.py      |  5 +++++
 tests/www_rbac/test_views.py | 36 +++++++++++++++++++++++++-----------
 4 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/airflow/www/views.py b/airflow/www/views.py
index f11fb84..b269454 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -340,8 +340,18 @@ def get_safe_url(url):
 
         parsed = urlparse(url)
 
+        # If the url is relative & it contains semicolon, redirect it to 
homepage to avoid
+        # potential XSS. (Similar to 
https://github.com/python/cpython/pull/24297/files (bpo-42967))
+        if parsed.netloc == '' and parsed.scheme == '' and ';' in unquote(url):
+            return "/admin/"
+
         query = parse_qsl(parsed.query, keep_blank_values=True)
-        url = parsed._replace(query=urlencode(query)).geturl()
+
+        # Remove all the query elements containing semicolon
+        # As part of https://github.com/python/cpython/pull/24297/files 
(bpo-42967)
+        # semicolon was already removed as a separator for query arguments by 
default
+        sanitized_query = [query_arg for query_arg in query if ';' not in 
query_arg[1]]
+        url = parsed._replace(query=urlencode(sanitized_query)).geturl()
         if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs:
             return url
     except Exception as e:  # pylint: disable=broad-except
diff --git a/airflow/www_rbac/views.py b/airflow/www_rbac/views.py
index 324ede3..3ca32c20 100644
--- a/airflow/www_rbac/views.py
+++ b/airflow/www_rbac/views.py
@@ -100,8 +100,19 @@ def get_safe_url(url):
 
         parsed = urlparse(url)
 
+        # If the url is relative & it contains semicolon, redirect it to 
homepage to avoid
+        # potential XSS. (Similar to 
https://github.com/python/cpython/pull/24297/files (bpo-42967))
+        if parsed.netloc == '' and parsed.scheme == '' and ';' in unquote(url):
+            return url_for('Airflow.index')
+
         query = parse_qsl(parsed.query, keep_blank_values=True)
-        url = parsed._replace(query=urlencode(query)).geturl()
+
+        # Remove all the query elements containing semicolon
+        # As part of https://github.com/python/cpython/pull/24297/files 
(bpo-42967)
+        # semicolon was already removed as a separator for query arguments by 
default
+        sanitized_query = [query_arg for query_arg in query if ';' not in 
query_arg[1]]
+        url = parsed._replace(query=urlencode(sanitized_query)).geturl()
+
         if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs:
             return url
     except Exception as e:  # pylint: disable=broad-except
diff --git a/tests/www/test_views.py b/tests/www/test_views.py
index 671c46f..f3fbd9d 100644
--- a/tests/www/test_views.py
+++ b/tests/www/test_views.py
@@ -1119,6 +1119,11 @@ class TestTriggerDag(unittest.TestCase):
     @parameterized.expand([
         ("javascript:alert(1)", "/admin/"),
         ("http://google.com";, "/admin/"),
+        ("36539'%3balert(1)%2f%2f166", "/admin/"),
+        (
+            
"%2Fadmin%2Fairflow%2Ftree%3Fdag_id%3Dexample_bash_operator';alert(33)//",
+            "/admin/",
+        ),
         (
             "%2Fadmin%2Fairflow%2Ftree%3Fdag_id%3Dexample_bash_operator"
             "&dag_id=example_bash_operator';alert(33)//",
diff --git a/tests/www_rbac/test_views.py b/tests/www_rbac/test_views.py
index 6880e42..d91eeed 100644
--- a/tests/www_rbac/test_views.py
+++ b/tests/www_rbac/test_views.py
@@ -41,7 +41,7 @@ from werkzeug.test import Client
 from werkzeug.wrappers import BaseResponse
 
 from airflow import models, settings, version
-from airflow.configuration import conf
+from airflow.configuration import conf, WEBSERVER_CONFIG, 
_read_default_config_file
 from airflow.config_templates.airflow_local_settings import 
DEFAULT_LOGGING_CONFIG
 from airflow.executors.celery_executor import CeleryExecutor
 from airflow.jobs import BaseJob
@@ -66,15 +66,26 @@ from tests.test_utils.db import clear_db_runs
 class TestBase(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        cls.orig_rbac_conf = conf.get('webserver', 'rbac')
+        conf.set('webserver', 'rbac', 'True')
+        cls._create_default_webserver_config()
         cls.app, cls.appbuilder = application.create_app(session=Session, 
testing=True)
         cls.app.config['WTF_CSRF_ENABLED'] = False
         cls.app.jinja_env.undefined = jinja2.StrictUndefined
         settings.configure_orm()
         cls.session = Session
 
+    @staticmethod
+    def _create_default_webserver_config():
+        if not os.path.isfile(WEBSERVER_CONFIG):
+            DEFAULT_WEBSERVER_CONFIG, _ = 
_read_default_config_file('default_webserver_config.py')
+            with open(WEBSERVER_CONFIG, 'w') as file:
+                file.write(DEFAULT_WEBSERVER_CONFIG)
+
     @classmethod
     def tearDownClass(cls):
         clear_db_runs()
+        conf.set('webserver', 'rbac', cls.orig_rbac_conf)
 
     def setUp(self):
         self.client = self.app.test_client()
@@ -2244,16 +2255,19 @@ class TestTriggerDag(TestBase):
         self.check_content_in_response(
             'Triggered example_bash_operator, it should start any moment 
now.', response)
 
-    @parameterized.expand([
-        ("javascript:alert(1)", "/home"),
-        ("http://google.com";, "/home"),
-        (
-            "%2Ftree%3Fdag_id%3Dexample_bash_operator';alert(33)//",
-            "/tree?dag_id=example_bash_operator%27&amp;alert%2833%29%2F%2F=",
-        ),
-        ("%2Ftree%3Fdag_id%3Dexample_bash_operator", 
"/tree?dag_id=example_bash_operator"),
-        ("%2Fgraph%3Fdag_id%3Dexample_bash_operator", 
"/graph?dag_id=example_bash_operator"),
-    ])
+    @parameterized.expand(
+        [
+            ("javascript:alert(1)", "/home"),
+            ("http://google.com";, "/home"),
+            ("36539'%3balert(1)%2f%2f166", "/home"),
+            (
+                "%2Ftree%3Fdag_id%3Dexample_bash_operator';alert(33)//",
+                "/home",
+            ),
+            ("%2Ftree%3Fdag_id%3Dexample_bash_operator", 
"/tree?dag_id=example_bash_operator"),
+            ("%2Fgraph%3Fdag_id%3Dexample_bash_operator", 
"/graph?dag_id=example_bash_operator"),
+        ]
+    )
     def test_trigger_dag_form_origin_url(self, test_origin, expected_origin):
         test_dag_id = "example_bash_operator"
 

Reply via email to