This is an automated email from the ASF dual-hosted git repository. utkarsharma pushed a commit to branch v2-9-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 91c02b589916231b2d416dcb2174e3442e40669a Author: Amogh Desai <[email protected]> AuthorDate: Fri Jun 28 17:10:56 2024 +0530 Validating provider description for urls in provider list view (#40475) * Validating provider description for urls in provider list view * adding unit tests --------- Co-authored-by: adesai <[email protected]> (cherry picked from commit f18f48492dc69f392e45567580b6ddb0c070ea58) --- airflow/www/views.py | 9 ++++++++- tests/www/views/test_views.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 3faed00a38..781f1b199b 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -36,7 +36,7 @@ from functools import cached_property from json import JSONDecodeError from pathlib import Path from typing import TYPE_CHECKING, Any, Collection, Iterator, Mapping, MutableMapping, Sequence -from urllib.parse import unquote, urljoin, urlsplit +from urllib.parse import unquote, urljoin, urlparse, urlsplit import configupdater import flask.json @@ -4321,6 +4321,13 @@ class ProviderView(AirflowBaseView): def _build_link(match_obj): text = match_obj.group(1) url = match_obj.group(2) + + # parsing the url to check if ita a valid url + parsed_url = urlparse(url) + if not (parsed_url.scheme == "http" or parsed_url.scheme == "https"): + # returning the original raw text + return escape(match_obj.group(0)) + return Markup(f'<a href="{url}">{text}</a>') cd = escape(description) diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py index 27f096403f..2a7412ff3b 100644 --- a/tests/www/views/test_views.py +++ b/tests/www/views/test_views.py @@ -22,6 +22,7 @@ import re from unittest import mock import pytest +from markupsafe import Markup from airflow.configuration import ( initialize_config, @@ -31,6 +32,7 @@ from airflow.configuration import ( from airflow.plugins_manager import AirflowPlugin, EntryPointSource from airflow.utils.task_group import TaskGroup from airflow.www.views import ( + ProviderView, get_key_paths, get_safe_url, get_task_stats_from_query, @@ -139,6 +141,37 @@ def test_should_list_providers_on_page_with_details(admin_client): check_content_in_response("Providers", resp) [email protected]( + "provider_description, expected", + [ + ("`Airbyte <https://airbyte.com/>`__", Markup('<a href="https://airbyte.com/">Airbyte</a>')), + ( + "Amazon integration (including `Amazon Web Services (AWS) <https://aws.amazon.com/>`__).", + Markup( + 'Amazon integration (including <a href="https://aws.amazon.com/">Amazon Web Services (' + "AWS)</a>)." + ), + ), + ( + "`Java Database Connectivity (JDBC) <https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc" + "/>`__", + Markup( + '<a href="https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/">Java ' + "Database Connectivity (JDBC)</a>" + ), + ), + ( + "`click me <javascript:prompt(document.domain)>`__", + Markup("`click me <javascript:prompt(document.domain)>`__"), + ), + ], +) +def test__clean_description(admin_client, provider_description, expected): + p = ProviderView() + actual = p._clean_description(provider_description) + assert actual == expected + + def test_endpoint_should_not_be_unauthenticated(app): resp = app.test_client().get("/provider", follow_redirects=True) check_content_not_in_response("Providers", resp)
