This is an automated email from the ASF dual-hosted git repository. elizabeth pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/superset.git
commit 7c23cb0b3fd224c320b35f05e74b572033569154 Author: Daniel Vaz Gaspar <[email protected]> AuthorDate: Tue Oct 17 18:28:09 2023 +0100 fix: improve upload ZIP file validation (#25658) --- superset/commands/importers/v1/utils.py | 2 + superset/config.py | 5 + superset/utils/core.py | 19 ++++ tests/unit_tests/utils/test_core.py | 179 +++++++++++++++++++++++++++++++- 4 files changed, 203 insertions(+), 2 deletions(-) diff --git a/superset/commands/importers/v1/utils.py b/superset/commands/importers/v1/utils.py index 1e73886682..3df5fbf821 100644 --- a/superset/commands/importers/v1/utils.py +++ b/superset/commands/importers/v1/utils.py @@ -25,6 +25,7 @@ from marshmallow.exceptions import ValidationError from superset import db from superset.commands.importers.exceptions import IncorrectVersionError from superset.models.core import Database +from superset.utils.core import check_is_safe_zip METADATA_FILE_NAME = "metadata.yaml" IMPORT_VERSION = "1.0.0" @@ -147,6 +148,7 @@ def is_valid_config(file_name: str) -> bool: def get_contents_from_bundle(bundle: ZipFile) -> Dict[str, str]: + check_is_safe_zip(bundle) return { remove_root(file_name): bundle.read(file_name).decode() for file_name in bundle.namelist() diff --git a/superset/config.py b/superset/config.py index c089416505..f2d9fa5adf 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1530,6 +1530,11 @@ WELCOME_PAGE_LAST_TAB: Union[ Literal["examples", "all"], Tuple[str, List[Dict[str, Any]]] ] = "all" +# Max allowed size for a zipped file +ZIPPED_FILE_MAX_SIZE = 100 * 1024 * 1024 # 100MB +# Max allowed compression ratio for a zipped file +ZIP_FILE_MAX_COMPRESS_RATIO = 200.0 + # Configuration for environment tag shown on the navbar. Setting 'text' to '' will hide the tag. # 'color' can either be a hex color code, or a dot-indexed theme color (e.g. error.base) ENVIRONMENT_TAG_CONFIG = { diff --git a/superset/utils/core.py b/superset/utils/core.py index 8cf73b84aa..517ca6e21b 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1998,6 +1998,25 @@ def create_zip(files: Dict[str, Any]) -> BytesIO: return buf +def check_is_safe_zip(zip_file: ZipFile) -> None: + """ + Checks whether a ZIP file is safe, raises SupersetException if not. + + :param zip_file: + :return: + """ + uncompress_size = 0 + compress_size = 0 + for zip_file_element in zip_file.infolist(): + if zip_file_element.file_size > current_app.config["ZIPPED_FILE_MAX_SIZE"]: + raise SupersetException("Found file with size above allowed threshold") + uncompress_size += zip_file_element.file_size + compress_size += zip_file_element.compress_size + compress_ratio = uncompress_size / compress_size + if compress_ratio > current_app.config["ZIP_FILE_MAX_COMPRESS_RATIO"]: + raise SupersetException("Zip compress ratio above allowed threshold") + + def remove_extra_adhoc_filters(form_data: Dict[str, Any]) -> None: """ Remove filters from slice data that originate from a filter box or native filter diff --git a/tests/unit_tests/utils/test_core.py b/tests/unit_tests/utils/test_core.py index 6845bb2fc1..d95b302c4f 100644 --- a/tests/unit_tests/utils/test_core.py +++ b/tests/unit_tests/utils/test_core.py @@ -15,11 +15,25 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Any, Dict +import os +from dataclasses import dataclass +from typing import Any, Dict, Optional +from unittest.mock import MagicMock +import pandas as pd import pytest -from superset.utils.core import QueryObjectFilterClause, remove_extra_adhoc_filters +from superset.exceptions import SupersetException +from superset.utils.core import ( + cast_to_boolean, + check_is_safe_zip, + DateColumn, + is_test, + normalize_dttm_col, + parse_boolean_string, + QueryObjectFilterClause, + remove_extra_adhoc_filters, +) ADHOC_FILTER: QueryObjectFilterClause = { "col": "foo", @@ -35,6 +49,12 @@ EXTRA_FILTER: QueryObjectFilterClause = { } +@dataclass +class MockZipInfo: + file_size: int + compress_size: int + + @pytest.mark.parametrize( "original,expected", [ @@ -84,3 +104,158 @@ def test_remove_extra_adhoc_filters( ) -> None: remove_extra_adhoc_filters(original) assert expected == original + + +def test_is_test() -> None: + orig_value = os.getenv("SUPERSET_TESTENV") + + os.environ["SUPERSET_TESTENV"] = "true" + assert is_test() + os.environ["SUPERSET_TESTENV"] = "false" + assert not is_test() + os.environ["SUPERSET_TESTENV"] = "" + assert not is_test() + + if orig_value is not None: + os.environ["SUPERSET_TESTENV"] = orig_value + + [email protected]( + "test_input,expected", + [ + ("y", True), + ("Y", True), + ("yes", True), + ("True", True), + ("t", True), + ("true", True), + ("On", True), + ("on", True), + ("1", True), + ("n", False), + ("N", False), + ("no", False), + ("False", False), + ("f", False), + ("false", False), + ("Off", False), + ("off", False), + ("0", False), + ("foo", False), + (None, False), + ], +) +def test_parse_boolean_string(test_input: Optional[str], expected: bool) -> None: + assert parse_boolean_string(test_input) == expected + + +def test_int_values() -> None: + assert cast_to_boolean(1) is True + assert cast_to_boolean(0) is False + assert cast_to_boolean(-1) is True + assert cast_to_boolean(42) is True + assert cast_to_boolean(0) is False + + +def test_float_values() -> None: + assert cast_to_boolean(0.5) is True + assert cast_to_boolean(3.14) is True + assert cast_to_boolean(-2.71) is True + assert cast_to_boolean(0.0) is False + + +def test_string_values() -> None: + assert cast_to_boolean("true") is True + assert cast_to_boolean("TruE") is True + assert cast_to_boolean("false") is False + assert cast_to_boolean("FaLsE") is False + assert cast_to_boolean("") is False + + +def test_none_value() -> None: + assert cast_to_boolean(None) is None + + +def test_boolean_values() -> None: + assert cast_to_boolean(True) is True + assert cast_to_boolean(False) is False + + +def test_other_values() -> None: + assert cast_to_boolean([]) is False + assert cast_to_boolean({}) is False + assert cast_to_boolean(object()) is False + + +def test_normalize_dttm_col() -> None: + """ + Tests for the ``normalize_dttm_col`` function. + + In particular, this covers a regression when Pandas was upgraded from 1.5.3 to + 2.0.3 and the behavior of ``pd.to_datetime`` changed. + """ + df = pd.DataFrame({"__time": ["2017-07-01T00:00:00.000Z"]}) + assert ( + df.to_markdown() + == """ +| | __time | +|---:|:-------------------------| +| 0 | 2017-07-01T00:00:00.000Z | + """.strip() + ) + + # in 1.5.3 this would return a datetime64[ns] dtype, but in 2.0.3 we had to + # add ``exact=False`` since there is a leftover after parsing the format + dttm_cols = (DateColumn("__time", "%Y-%m-%d"),) + + # the function modifies the dataframe in place + normalize_dttm_col(df, dttm_cols) + + assert df["__time"].astype(str).tolist() == ["2017-07-01"] + + +def test_check_if_safe_zip_success(app_context: None) -> None: + """ + Test if ZIP files are safe + """ + ZipFile = MagicMock() + ZipFile.infolist.return_value = [ + MockZipInfo(file_size=1000, compress_size=10), + MockZipInfo(file_size=1000, compress_size=10), + MockZipInfo(file_size=1000, compress_size=10), + MockZipInfo(file_size=1000, compress_size=10), + MockZipInfo(file_size=1000, compress_size=10), + ] + check_is_safe_zip(ZipFile) + + +def test_check_if_safe_zip_high_rate(app_context: None) -> None: + """ + Test if ZIP files is not highly compressed + """ + ZipFile = MagicMock() + ZipFile.infolist.return_value = [ + MockZipInfo(file_size=1000, compress_size=1), + MockZipInfo(file_size=1000, compress_size=1), + MockZipInfo(file_size=1000, compress_size=1), + MockZipInfo(file_size=1000, compress_size=1), + MockZipInfo(file_size=1000, compress_size=1), + ] + with pytest.raises(SupersetException): + check_is_safe_zip(ZipFile) + + +def test_check_if_safe_zip_hidden_bomb(app_context: None) -> None: + """ + Test if ZIP file does not contain a big file highly compressed + """ + ZipFile = MagicMock() + ZipFile.infolist.return_value = [ + MockZipInfo(file_size=1000, compress_size=100), + MockZipInfo(file_size=1000, compress_size=100), + MockZipInfo(file_size=1000, compress_size=100), + MockZipInfo(file_size=1000, compress_size=100), + MockZipInfo(file_size=1000 * (1024 * 1024), compress_size=100), + ] + with pytest.raises(SupersetException): + check_is_safe_zip(ZipFile)
