This is an automated email from the ASF dual-hosted git repository.

utkarsharma pushed a commit to branch v2-9-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 91c02b589916231b2d416dcb2174e3442e40669a
Author: Amogh Desai <[email protected]>
AuthorDate: Fri Jun 28 17:10:56 2024 +0530

    Validating provider description for urls in provider list view (#40475)
    
    * Validating provider description for urls in provider list view
    
    * adding unit tests
    
    ---------
    
    Co-authored-by: adesai <[email protected]>
    (cherry picked from commit f18f48492dc69f392e45567580b6ddb0c070ea58)
---
 airflow/www/views.py          |  9 ++++++++-
 tests/www/views/test_views.py | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/airflow/www/views.py b/airflow/www/views.py
index 3faed00a38..781f1b199b 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -36,7 +36,7 @@ from functools import cached_property
 from json import JSONDecodeError
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Collection, Iterator, Mapping, 
MutableMapping, Sequence
-from urllib.parse import unquote, urljoin, urlsplit
+from urllib.parse import unquote, urljoin, urlparse, urlsplit
 
 import configupdater
 import flask.json
@@ -4321,6 +4321,13 @@ class ProviderView(AirflowBaseView):
         def _build_link(match_obj):
             text = match_obj.group(1)
             url = match_obj.group(2)
+
+            # parsing the url to check if ita a valid url
+            parsed_url = urlparse(url)
+            if not (parsed_url.scheme == "http" or parsed_url.scheme == 
"https"):
+                # returning the original raw text
+                return escape(match_obj.group(0))
+
             return Markup(f'<a href="{url}">{text}</a>')
 
         cd = escape(description)
diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py
index 27f096403f..2a7412ff3b 100644
--- a/tests/www/views/test_views.py
+++ b/tests/www/views/test_views.py
@@ -22,6 +22,7 @@ import re
 from unittest import mock
 
 import pytest
+from markupsafe import Markup
 
 from airflow.configuration import (
     initialize_config,
@@ -31,6 +32,7 @@ from airflow.configuration import (
 from airflow.plugins_manager import AirflowPlugin, EntryPointSource
 from airflow.utils.task_group import TaskGroup
 from airflow.www.views import (
+    ProviderView,
     get_key_paths,
     get_safe_url,
     get_task_stats_from_query,
@@ -139,6 +141,37 @@ def 
test_should_list_providers_on_page_with_details(admin_client):
     check_content_in_response("Providers", resp)
 
 
[email protected](
+    "provider_description, expected",
+    [
+        ("`Airbyte <https://airbyte.com/>`__", Markup('<a 
href="https://airbyte.com/";>Airbyte</a>')),
+        (
+            "Amazon integration (including `Amazon Web Services (AWS) 
<https://aws.amazon.com/>`__).",
+            Markup(
+                'Amazon integration (including <a 
href="https://aws.amazon.com/";>Amazon Web Services ('
+                "AWS)</a>)."
+            ),
+        ),
+        (
+            "`Java Database Connectivity (JDBC) 
<https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc";
+            "/>`__",
+            Markup(
+                '<a 
href="https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/";>Java '
+                "Database Connectivity (JDBC)</a>"
+            ),
+        ),
+        (
+            "`click me <javascript:prompt(document.domain)>`__",
+            Markup("`click me &lt;javascript:prompt(document.domain)&gt;`__"),
+        ),
+    ],
+)
+def test__clean_description(admin_client, provider_description, expected):
+    p = ProviderView()
+    actual = p._clean_description(provider_description)
+    assert actual == expected
+
+
 def test_endpoint_should_not_be_unauthenticated(app):
     resp = app.test_client().get("/provider", follow_redirects=True)
     check_content_not_in_response("Providers", resp)

Reply via email to