This is an automated email from the ASF dual-hosted git repository. elizabeth pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/superset.git
commit 52cdd57dd990e450fc9062ca65d485a3140d66a5 Author: EugeneTorap <[email protected]> AuthorDate: Fri Apr 28 16:36:51 2023 +0300 chore: Use nh3 lib instead of bleach (#23862) --- requirements/base.txt | 8 ++----- setup.cfg | 2 +- setup.py | 2 +- .../dashboard/components/gridComponents/Chart.jsx | 4 ++-- superset/reports/notifications/email.py | 27 ++++++++++++---------- superset/utils/async_query_manager.py | 4 ++-- superset/utils/core.py | 13 ++++++----- tests/unit_tests/notifications/email_tests.py | 5 +++- 8 files changed, 34 insertions(+), 31 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 1ae306e8f3..a8f7e0c18b 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -23,8 +23,6 @@ bcrypt==4.0.1 # via paramiko billiard==3.6.4.0 # via celery -bleach==3.3.1 - # via apache-superset brotli==1.0.9 # via flask-compress cachelib==0.4.1 @@ -174,6 +172,8 @@ marshmallow-sqlalchemy==0.23.1 # via flask-appbuilder msgpack==1.0.2 # via apache-superset +nh3==0.2.11 + # via apache-superset numpy==1.23.5 # via # apache-superset @@ -181,7 +181,6 @@ numpy==1.23.5 # pyarrow packaging==21.3 # via - # bleach # deprecation pandas==1.5.3 # via apache-superset @@ -252,7 +251,6 @@ simplejson==3.17.3 # via apache-superset six==1.16.0 # via - # bleach # click-repl # isodate # jsonschema @@ -297,8 +295,6 @@ vine==5.0.0 # kombu wcwidth==0.2.5 # via prompt-toolkit -webencodings==0.5.1 - # via bleach werkzeug==2.3.3 # via # apache-superset diff --git a/setup.cfg b/setup.cfg index a9470d51bd..970fd4ca82 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resour [...] +known_third_party =alembic,apispec,backoff,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,nh3,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources [...] multi_line_output = 3 order_by_type = false diff --git a/setup.py b/setup.py index d7d028ba93..66314ed9c4 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,6 @@ setup( }, install_requires=[ "backoff>=1.8.0", - "bleach>=3.0.2, <4.0.0", "cachelib>=0.4.1,<0.5", "celery>=5.2.2, <6.0.0", "click>=8.0.3", @@ -100,6 +99,7 @@ setup( "isodate", "markdown>=3.0", "msgpack>=1.0.0, <1.1", + "nh3>=0.2.11, <0.3", "numpy==1.23.5", "pandas>=1.5.3, <1.6", "parsedatetime", diff --git a/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx b/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx index 89d4fb94ce..cf06a1d7ed 100644 --- a/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx +++ b/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx @@ -486,10 +486,10 @@ class Chart extends React.Component { {/* This usage of dangerouslySetInnerHTML is safe since it is being used to render - markdown that is sanitized with bleach. See: + markdown that is sanitized with nh3. See: https://github.com/apache/superset/pull/4390 and - https://github.com/apache/superset/commit/b6fcc22d5a2cb7a5e92599ed5795a0169385a825 + https://github.com/apache/superset/pull/23862 */} {isExpanded && slice.description_markeddown && ( <div diff --git a/superset/reports/notifications/email.py b/superset/reports/notifications/email.py index 22b1714f99..10a76e7573 100644 --- a/superset/reports/notifications/email.py +++ b/superset/reports/notifications/email.py @@ -22,7 +22,7 @@ from dataclasses import dataclass from email.utils import make_msgid, parseaddr from typing import Any, Dict, Optional -import bleach +import nh3 from flask_babel import gettext as __ from superset import app @@ -35,10 +35,10 @@ from superset.utils.decorators import statsd_gauge logger = logging.getLogger(__name__) -TABLE_TAGS = ["table", "th", "tr", "td", "thead", "tbody", "tfoot"] -TABLE_ATTRIBUTES = ["colspan", "rowspan", "halign", "border", "class"] +TABLE_TAGS = {"table", "th", "tr", "td", "thead", "tbody", "tfoot"} +TABLE_ATTRIBUTES = {"colspan", "rowspan", "halign", "border", "class"} -ALLOWED_TAGS = [ +ALLOWED_TAGS = { "a", "abbr", "acronym", @@ -54,13 +54,14 @@ ALLOWED_TAGS = [ "p", "strong", "ul", -] + TABLE_TAGS +}.union(TABLE_TAGS) +ALLOWED_TABLE_ATTRIBUTES = {tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS} ALLOWED_ATTRIBUTES = { - "a": ["href", "title"], - "abbr": ["title"], - "acronym": ["title"], - **{tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS}, + "a": {"href", "title"}, + "abbr": {"title"}, + "acronym": {"title"}, + **ALLOWED_TABLE_ATTRIBUTES, } @@ -108,7 +109,8 @@ class EmailNotification(BaseNotification): # pylint: disable=too-few-public-met } # Strip any malicious HTML from the description - description = bleach.clean( + # pylint: disable=no-member + description = nh3.clean( self._content.description or "", tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, @@ -117,12 +119,13 @@ class EmailNotification(BaseNotification): # pylint: disable=too-few-public-met # Strip malicious HTML from embedded data, allowing only table elements if self._content.embedded_data is not None: df = self._content.embedded_data - html_table = bleach.clean( + # pylint: disable=no-member + html_table = nh3.clean( df.to_html(na_rep="", index=True, escape=True), # pandas will escape the HTML in cells already, so passing # more allowed tags here will not work tags=TABLE_TAGS, - attributes=TABLE_ATTRIBUTES, + attributes=ALLOWED_TABLE_ATTRIBUTES, ) else: html_table = "" diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py index 71559aaa3d..b6c608948d 100644 --- a/superset/utils/async_query_manager.py +++ b/superset/utils/async_query_manager.py @@ -192,5 +192,5 @@ class AsyncQueryManager: logger.debug("********** logging event data to stream %s", scoped_stream_name) logger.debug(event_data) - self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) - self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) + self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) # type: ignore + self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) # type: ignore diff --git a/superset/utils/core.py b/superset/utils/core.py index 109d6742b1..84a4bde9b3 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -70,8 +70,8 @@ from typing import ( from urllib.parse import unquote_plus from zipfile import ZipFile -import bleach import markdown as md +import nh3 import numpy as np import pandas as pd import sqlalchemy as sa @@ -664,7 +664,7 @@ def error_msg_from_exception(ex: Exception) -> str: def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str: - safe_markdown_tags = [ + safe_markdown_tags = { "h1", "h2", "h3", @@ -690,10 +690,10 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str: "dt", "img", "a", - ] + } safe_markdown_attrs = { - "img": ["src", "alt", "title"], - "a": ["href", "alt", "title"], + "img": {"src", "alt", "title"}, + "a": {"href", "alt", "title"}, } safe = md.markdown( raw or "", @@ -703,7 +703,8 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str: "markdown.extensions.codehilite", ], ) - safe = bleach.clean(safe, safe_markdown_tags, safe_markdown_attrs) + # pylint: disable=no-member + safe = nh3.clean(safe, tags=safe_markdown_tags, attributes=safe_markdown_attrs) if markup_wrap: safe = Markup(safe) return safe diff --git a/tests/unit_tests/notifications/email_tests.py b/tests/unit_tests/notifications/email_tests.py index 4ce34b99ca..697a9bac40 100644 --- a/tests/unit_tests/notifications/email_tests.py +++ b/tests/unit_tests/notifications/email_tests.py @@ -50,5 +50,8 @@ def test_render_description_with_html() -> None: ._get_content() .body ) - assert '<p>This is <a href="#">a test</a> alert</p><br>' in email_body + assert ( + '<p>This is <a href="#" rel="noopener noreferrer">a test</a> alert</p><br>' + in email_body + ) assert '<td><a href="http://www.example.com">333</a></td>' in email_body
