This is an automated email from the ASF dual-hosted git repository. vavila pushed a commit to branch feat/masked-encrypted-extra-import-export-backend in repository https://gitbox.apache.org/repos/asf/superset.git
commit df23c4a15199e59909968f41ff2c9ed9a69f1501 Author: Vitor Avila <[email protected]> AuthorDate: Wed Feb 18 16:53:16 2026 -0300 feat: support for import/export masked_encrypted_extra (backend) --- superset/commands/database/export.py | 9 ++ superset/commands/database/importers/v1/utils.py | 24 +++- superset/commands/importers/v1/__init__.py | 4 + superset/commands/importers/v1/assets.py | 4 + superset/commands/importers/v1/utils.py | 19 ++++ superset/databases/api.py | 14 +++ superset/databases/schemas.py | 57 ++++++++++ superset/importexport/api.py | 13 +++ superset/utils/json.py | 42 +++++++ tests/integration_tests/databases/api_tests.py | 91 +++++++++++++++ tests/integration_tests/fixtures/importexport.py | 26 +++++ tests/unit_tests/databases/api_test.py | 70 ++++++++++++ .../databases/commands/importers/v1/import_test.py | 123 +++++++++++++++++++++ tests/unit_tests/databases/schema_tests.py | 106 ++++++++++++++++++ tests/unit_tests/importexport/api_test.py | 52 +++++++++ tests/unit_tests/utils/json_tests.py | 115 +++++++++++++++++++ 16 files changed, 767 insertions(+), 2 deletions(-) diff --git a/superset/commands/database/export.py b/superset/commands/database/export.py index 56929155cd3..9d087d8f3e0 100644 --- a/superset/commands/database/export.py +++ b/superset/commands/database/export.py @@ -97,6 +97,15 @@ class ExportDatabasesCommand(ExportModelsCommand): ) payload["ssh_tunnel"] = mask_password_info(ssh_tunnel_payload) + # If DB has sensitive fields in Secure Extra, export them masked. + # If not, export them as-is. + if encrypted_extra := model.encrypted_extra: + masked_encrypted_extra = model.masked_encrypted_extra + if encrypted_extra != masked_encrypted_extra: + payload["masked_encrypted_extra"] = masked_encrypted_extra + else: + payload["encrypted_extra"] = encrypted_extra + payload["version"] = EXPORT_VERSION file_content = yaml.safe_dump(payload, sort_keys=False) diff --git a/superset/commands/database/importers/v1/utils.py b/superset/commands/database/importers/v1/utils.py index a4c7d6cf13c..6539a35b11c 100644 --- a/superset/commands/database/importers/v1/utils.py +++ b/superset/commands/database/importers/v1/utils.py @@ -26,7 +26,10 @@ from superset.commands.exceptions import ImportFailedError from superset.databases.ssh_tunnel.models import SSHTunnel from superset.databases.utils import make_url_safe from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError -from superset.exceptions import SupersetSecurityException +from superset.exceptions import ( + OAuth2RedirectError, + SupersetSecurityException, +) from superset.models.core import Database from superset.security.analytics_db_safety import check_sqlalchemy_uri from superset.utils import json @@ -74,6 +77,23 @@ def import_database( # noqa: C901 # TODO (betodealmeida): move this logic to import_from_dict config["extra"] = json.dumps(config["extra"]) + # Convert masked_encrypted_extra → encrypted_extra before importing. + # For existing DBs, reveal masked sensitive values from current encrypted_extra. + # For new DBs, schema validation already ensured no fields are still masked. + if masked_encrypted_extra := config.pop("masked_encrypted_extra", None): + if existing and existing.encrypted_extra: + old_config = json.loads(existing.encrypted_extra) + new_config = json.loads(masked_encrypted_extra) + sensitive_fields = existing.db_engine_spec.encrypted_extra_sensitive_fields + revealed = json.reveal_sensitive( + old_config, + new_config, + sensitive_fields, + ) + config["encrypted_extra"] = json.dumps(revealed) + else: + config["encrypted_extra"] = masked_encrypted_extra + ssh_tunnel_config = config.pop("ssh_tunnel", None) # set SQLAlchemy URI via `set_sqlalchemy_uri` so that the password gets masked @@ -94,7 +114,7 @@ def import_database( # noqa: C901 try: add_permissions(database) - except SupersetDBAPIConnectionError as ex: + except (SupersetDBAPIConnectionError, OAuth2RedirectError) as ex: logger.warning(ex.message) return database diff --git a/superset/commands/importers/v1/__init__.py b/superset/commands/importers/v1/__init__.py index f2f314479b2..d8d01040876 100644 --- a/superset/commands/importers/v1/__init__.py +++ b/superset/commands/importers/v1/__init__.py @@ -63,6 +63,9 @@ class ImportModelsCommand(BaseCommand): self.ssh_tunnel_priv_key_passwords: dict[str, str] = ( kwargs.get("ssh_tunnel_priv_key_passwords") or {} ) + self.encrypted_extra_secrets: dict[str, dict[str, str]] = ( + kwargs.get("encrypted_extra_secrets") or {} + ) self.overwrite: bool = kwargs.get("overwrite", False) self._configs: dict[str, Any] = {} @@ -111,6 +114,7 @@ class ImportModelsCommand(BaseCommand): self.ssh_tunnel_passwords, self.ssh_tunnel_private_keys, self.ssh_tunnel_priv_key_passwords, + self.encrypted_extra_secrets, ) self._prevent_overwrite_existing_model(exceptions) diff --git a/superset/commands/importers/v1/assets.py b/superset/commands/importers/v1/assets.py index 3b0442157f1..d8155abf558 100644 --- a/superset/commands/importers/v1/assets.py +++ b/superset/commands/importers/v1/assets.py @@ -82,6 +82,9 @@ class ImportAssetsCommand(BaseCommand): self.ssh_tunnel_priv_key_passwords: dict[str, str] = ( kwargs.get("ssh_tunnel_priv_key_passwords") or {} ) + self.encrypted_extra_secrets: dict[str, dict[str, str]] = ( + kwargs.get("encrypted_extra_secrets") or {} + ) self._configs: dict[str, Any] = {} self.sparse = kwargs.get("sparse", False) @@ -199,6 +202,7 @@ class ImportAssetsCommand(BaseCommand): self.ssh_tunnel_passwords, self.ssh_tunnel_private_keys, self.ssh_tunnel_priv_key_passwords, + self.encrypted_extra_secrets, ) if exceptions: diff --git a/superset/commands/importers/v1/utils.py b/superset/commands/importers/v1/utils.py index 3aeb586997c..6e5bd93b440 100644 --- a/superset/commands/importers/v1/utils.py +++ b/superset/commands/importers/v1/utils.py @@ -31,6 +31,7 @@ from superset.extensions import feature_flag_manager from superset.models.core import Database from superset.models.dashboard import dashboard_slices from superset.tags.models import Tag, TaggedObject +from superset.utils import json from superset.utils.core import check_is_safe_zip from superset.utils.decorators import transaction @@ -111,6 +112,7 @@ def load_configs( ssh_tunnel_passwords: dict[str, str], ssh_tunnel_private_keys: dict[str, str], ssh_tunnel_priv_key_passwords: dict[str, str], + encrypted_extra_secrets: dict[str, dict[str, str]], ) -> dict[str, Any]: configs: dict[str, Any] = {} @@ -191,6 +193,23 @@ def load_configs( db_ssh_tunnel_priv_key_passws[config["uuid"]] ) + # populate encrypted_extra secrets from the request + # The secrets dict maps JSONPath -> value + # e.g., {"$.oauth2_client_info.secret": "actual_value"} + if file_name in encrypted_extra_secrets and config.get( + "masked_encrypted_extra" + ): + # Normalize escape sequences (needed for PEM keys/certs) + normalized_secrets = { + path: value.replace("\\n", "\n") + if isinstance(value, str) + else value + for path, value in encrypted_extra_secrets[file_name].items() + } + temp_dict = json.loads(config["masked_encrypted_extra"]) + temp_dict = json.set_masked_fields(temp_dict, normalized_secrets) + config["masked_encrypted_extra"] = json.dumps(temp_dict) + # Normalize example data URLs before schema validation if prefix == "datasets" and "data" in config: from superset.examples.helpers import normalize_example_data_url diff --git a/superset/databases/api.py b/superset/databases/api.py index e9178d1f24e..c46dd9ea844 100644 --- a/superset/databases/api.py +++ b/superset/databases/api.py @@ -1591,6 +1591,14 @@ class DatabaseRestApi(BaseSupersetModelRestApi): the private_key should be provided in the following format: `{"databases/MyDatabase.yaml": "my_private_key_password"}`. type: string + encrypted_extra_secrets: + description: >- + JSON map of sensitive values for masked_encrypted_extra fields. + Keys are file paths (e.g., "databases/db.yaml") and values + are JSON objects with the restricted fields + (e.g., `{"databases/MyDatabase.yaml": + {"credentials_info": {"private_key": "actual_key"}}}`). + type: string responses: 200: description: Database import result @@ -1642,6 +1650,11 @@ class DatabaseRestApi(BaseSupersetModelRestApi): if "ssh_tunnel_private_key_passwords" in request.form else None ) + encrypted_extra_secrets = ( + json.loads(request.form["encrypted_extra_secrets"]) + if "encrypted_extra_secrets" in request.form + else None + ) command = ImportDatabasesCommand( contents, @@ -1650,6 +1663,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi): ssh_tunnel_passwords=ssh_tunnel_passwords, ssh_tunnel_private_keys=ssh_tunnel_private_keys, ssh_tunnel_priv_key_passwords=ssh_tunnel_priv_key_passwords, + encrypted_extra_secrets=encrypted_extra_secrets, ) command.run() return self.response(200, message="OK") diff --git a/superset/databases/schemas.py b/superset/databases/schemas.py index 199da14b63c..f292a72eba8 100644 --- a/superset/databases/schemas.py +++ b/superset/databases/schemas.py @@ -59,6 +59,7 @@ from superset.models.core import ConfigurationMethod, Database from superset.security.analytics_db_safety import check_sqlalchemy_uri from superset.utils import json from superset.utils.core import markdown, parse_ssl_cert +from superset.utils.json import get_masked_fields database_schemas_query_schema = { "type": "object", @@ -241,6 +242,15 @@ def encrypted_extra_validator(value: str | None) -> None: ) from ex +def masked_encrypted_extra_validator(value: str) -> None: + """ + Validate that `masked_encrypted_extra` is a valid non-empty JSON string + """ + if value == "{}": + raise ValidationError([_("Field cannot be empty.")]) + encrypted_extra_validator(value) + + def extra_validator(value: str) -> str: """ Validate that extra is a valid JSON string, and that metadata_params @@ -874,6 +884,9 @@ class ImportV1DatabaseSchema(Schema): sqlalchemy_uri = fields.String(required=True) password = fields.String(allow_none=True) encrypted_extra = fields.String(allow_none=True, validate=encrypted_extra_validator) + masked_encrypted_extra = fields.String( + allow_none=False, validate=masked_encrypted_extra_validator + ) cache_timeout = fields.Integer(allow_none=True) expose_in_sqllab = fields.Boolean() allow_run_async = fields.Boolean() @@ -971,6 +984,50 @@ class ImportV1DatabaseSchema(Schema): raise ValidationError(exception_messages) return + @validates_schema + def validate_masked_encrypted_extra( + self, data: dict[str, Any], **kwargs: Any + ) -> None: + if "masked_encrypted_extra" not in data: + return + + if "encrypted_extra" in data: + raise ValidationError( + "File contains both `encrypted_extra` and `masked_encrypted_extra`" + ) + + if db.session.query(Database).filter_by(uuid=data["uuid"]).first(): + # Existing DB: sensitive values will be revealed from existing + # encrypted_extra in import_database() + return + + masked_encrypted_extra = json.loads(data["masked_encrypted_extra"]) + + # Determine engine spec from sqlalchemy_uri to get sensitive fields + sqlalchemy_uri = data["sqlalchemy_uri"] + url = make_url_safe(sqlalchemy_uri) + backend = url.get_backend_name() + driver = url.get_driver_name() + db_engine_spec = get_engine_spec(backend, driver=driver) + + # Check if any sensitive field is still masked + masked_fields = get_masked_fields( + masked_encrypted_extra, + db_engine_spec.encrypted_extra_sensitive_fields, + ) + + if masked_fields: + labels = db_engine_spec.encrypted_extra_sensitive_field_labels + raise ValidationError( + { + "_schema": [ + f"Must provide value for masked_encrypted_extra field: {field}" + + (f" ({labels[field]})" if field in labels else "") + for field in masked_fields + ] + } + ) + def encrypted_field_properties(self, field: Any, **_) -> dict[str, Any]: # type: ignore ret = {} diff --git a/superset/importexport/api.py b/superset/importexport/api.py index 0a6a27dd58d..09da5c0ff70 100644 --- a/superset/importexport/api.py +++ b/superset/importexport/api.py @@ -147,6 +147,13 @@ class ImportExportRestApi(BaseSupersetApi): the private_key should be provided in the following format: `{"databases/MyDatabase.yaml": "my_private_key_password"}`. type: string + encrypted_extra_secrets: + description: >- + JSON map of secret values for masked encrypted_extra fields. + Each key is a database file path and the value is a map of + JSONPath expressions to secret values. For example: + `{"databases/db.yaml": {"$.credentials_info.secret": "foo"}}`. + type: string sparse: description: allow sparse update of resources type: boolean @@ -202,6 +209,11 @@ class ImportExportRestApi(BaseSupersetApi): if "ssh_tunnel_private_key_passwords" in request.form else None ) + encrypted_extra_secrets = ( + json.loads(request.form["encrypted_extra_secrets"]) + if "encrypted_extra_secrets" in request.form + else None + ) command = ImportAssetsCommand( contents, @@ -210,6 +222,7 @@ class ImportExportRestApi(BaseSupersetApi): ssh_tunnel_passwords=ssh_tunnel_passwords, ssh_tunnel_private_keys=ssh_tunnel_private_keys, ssh_tunnel_priv_key_passwords=ssh_tunnel_priv_key_passwords, + encrypted_extra_secrets=encrypted_extra_secrets, ) command.run() return self.response(200, message="OK") diff --git a/superset/utils/json.py b/superset/utils/json.py index 689b0af6def..3a9bdbd15f2 100644 --- a/superset/utils/json.py +++ b/superset/utils/json.py @@ -302,3 +302,45 @@ def reveal_sensitive( match.context.value[match.path.fields[0]] = old_value[0].value return revealed_payload + + +def get_masked_fields( + payload: dict[str, Any], + sensitive_fields: set[str], +) -> list[str]: + """ + Returns masked fields in JSON config. + + :param payload: The payload to check + :param sensitive_fields: The set of fields to check, as JSONPath expressions + :returns: List of JSONPath expressions for fields that are masked + """ + masked = [] + for json_path in sensitive_fields: + jsonpath_expr = parse(json_path) + for match in jsonpath_expr.find(payload): + if match.value == PASSWORD_MASK: + masked.append(json_path) + break + return masked + + +def set_masked_fields( + payload: dict[str, Any], + path_values: dict[str, Any], +) -> dict[str, Any]: + """ + Sets values at JSONPath locations in a payload. + + :param payload: The payload to modify + :param path_values: A dict mapping JSONPath expressions to values + :returns: The modified payload (copy) + """ + result = copy.deepcopy(payload) + + for json_path, value in path_values.items(): + jsonpath_expr = parse(json_path) + for match in jsonpath_expr.find(result): + match.context.value[match.path.fields[0]] = value + + return result diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py index f77a47c8a33..632d15673fa 100644 --- a/tests/integration_tests/databases/api_tests.py +++ b/tests/integration_tests/databases/api_tests.py @@ -67,6 +67,7 @@ from tests.integration_tests.fixtures.world_bank_dashboard import ( ) from tests.integration_tests.fixtures.importexport import ( database_config, + database_config_with_masked_encrypted_extra, dataset_config, database_with_ssh_tunnel_config_password, database_with_ssh_tunnel_config_private_key, @@ -3112,6 +3113,96 @@ class TestDatabaseApi(SupersetTestCase): db.session.delete(database) db.session.commit() + @mock.patch("superset.commands.database.importers.v1.utils.add_permissions") + def test_import_database_masked_encrypted_extra_missing_field( + self, mock_add_permissions + ): + """ + Database API: Test import database with masked_encrypted_extra containing + PASSWORD_MASK values for a new DB returns a validation error listing the + fields that need real values. + """ + self.login(ADMIN_USERNAME) + uri = "api/v1/database/import/" + + masked_config = database_config_with_masked_encrypted_extra.copy() + masked_config["masked_encrypted_extra"] = json.dumps( + { + "credentials_info": { + "type": "service_account", + "project_id": "test-project", + "private_key": "XXXXXXXXXX", + } + } + ) + # Use a UUID that doesn't exist in the DB so it's treated as a new database + masked_config["uuid"] = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + + buf = self.create_import_v1_zip_file("database", databases=[masked_config]) + form_data = { + "formData": (buf, "database_export.zip"), + } + rv = self.client.post(uri, data=form_data, content_type="multipart/form-data") + response = json.loads(rv.data.decode("utf-8")) + + assert rv.status_code == 422 + assert len(response["errors"]) == 1 + error = response["errors"][0] + assert error["error_type"] == "GENERIC_COMMAND_ERROR" + assert "Must provide value for masked_encrypted_extra field" in str( + error["extra"] + ) + assert "$.credentials_info.private_key" in str(error["extra"]) + + @mock.patch("superset.commands.database.importers.v1.utils.add_permissions") + def test_import_database_with_encrypted_extra_secrets(self, mock_add_permissions): + """ + Database API: Test import database with encrypted_extra_secrets in form data. + The secrets should replace PASSWORD_MASK values in the config before import. + """ + self.login(ADMIN_USERNAME) + uri = "api/v1/database/import/" + + masked_config = database_config_with_masked_encrypted_extra.copy() + masked_config["masked_encrypted_extra"] = json.dumps( + { + "credentials_info": { + "type": "service_account", + "project_id": "test-project", + "private_key": "XXXXXXXXXX", + } + } + ) + + buf = self.create_import_v1_zip_file("database", databases=[masked_config]) + form_data = { + "formData": (buf, "database_export.zip"), + "encrypted_extra_secrets": json.dumps( + { + "databases/database_1.yaml": { + "$.credentials_info.private_key": "-----BEGIN PRIVATE KEY-----\\nREAL_KEY\\n-----END PRIVATE KEY-----\\n", # noqa: E501 + } + } + ), + } + rv = self.client.post(uri, data=form_data, content_type="multipart/form-data") + response = json.loads(rv.data.decode("utf-8")) + + assert rv.status_code == 200 + assert response == {"message": "OK"} + + database = ( + db.session.query(Database).filter_by(uuid=masked_config["uuid"]).one() + ) + assert database.encrypted_extra is not None + encrypted = json.loads(database.encrypted_extra) + assert encrypted["credentials_info"]["private_key"] == ( + "-----BEGIN PRIVATE KEY-----\nREAL_KEY\n-----END PRIVATE KEY-----\n" + ) + + db.session.delete(database) + db.session.commit() + @mock.patch( "superset.db_engine_specs.base.BaseEngineSpec.get_function_names", ) diff --git a/tests/integration_tests/fixtures/importexport.py b/tests/integration_tests/fixtures/importexport.py index 0cd63526e8c..7540026c863 100644 --- a/tests/integration_tests/fixtures/importexport.py +++ b/tests/integration_tests/fixtures/importexport.py @@ -17,6 +17,8 @@ from copy import deepcopy from typing import Any +from superset.utils import json + # example V0 import/export format dataset_ui_export: list[dict[str, Any]] = [ { @@ -399,6 +401,30 @@ database_config_no_creds: dict[str, Any] = { "version": "1.0.0", } +database_config_with_masked_encrypted_extra: dict[str, Any] = { + "allow_csv_upload": False, + "allow_ctas": False, + "allow_cvas": False, + "allow_dml": False, + "allow_run_async": False, + "cache_timeout": None, + "database_name": "imported_database_encrypted", + "expose_in_sqllab": True, + "extra": {}, + "sqlalchemy_uri": "bigquery://test-project/", + "uuid": "c9a1dde3-889e-5bc0-9ae9-0bc229e8fa90", + "masked_encrypted_extra": json.dumps( + { + "credentials_info": { + "type": "service_account", + "project_id": "test-project", + "private_key": "-----BEGIN PRIVATE KEY-----\nMyPriVaTeKeY\n-----END PRIVATE KEY-----\n", # noqa: E501 + } + } + ), + "version": "1.0.0", +} + database_with_ssh_tunnel_config_private_key: dict[str, Any] = { "allow_csv_upload": True, "allow_ctas": True, diff --git a/tests/unit_tests/databases/api_test.py b/tests/unit_tests/databases/api_test.py index 244d75f7e28..d907b02bf9f 100644 --- a/tests/unit_tests/databases/api_test.py +++ b/tests/unit_tests/databases/api_test.py @@ -451,6 +451,76 @@ def test_import( ssh_tunnel_passwords=None, ssh_tunnel_private_keys=None, ssh_tunnel_priv_key_passwords=None, + encrypted_extra_secrets=None, + ) + + +def test_import_with_encrypted_extra_secrets( + mocker: MockerFixture, + client: Any, + full_api_access: None, +) -> None: + """ + Test that encrypted_extra_secrets are passed to ImportDatabasesCommand. + """ + contents = { + "metadata.yaml": yaml.safe_dump( + { + "version": "1.0.0", + "type": "Database", + "timestamp": "2021-01-01T00:00:00Z", + } + ), + "databases/test.yaml": yaml.safe_dump( + { + "database_name": "test", + "sqlalchemy_uri": "bigquery://gcp-project-id/", + "cache_timeout": 0, + "expose_in_sqllab": True, + "allow_run_async": False, + "allow_ctas": False, + "allow_cvas": False, + "allow_dml": False, + "allow_file_upload": False, + "masked_encrypted_extra": json.dumps( + {"credentials_info": {"private_key": "XXXXXXXXXX"}} + ), + "extra": json.dumps({"allows_virtual_table_explore": True}), + "uuid": "00000000-0000-0000-0000-123456789001", + } + ), + } + mocker.patch("superset.databases.api.is_zipfile", return_value=True) + mocker.patch("superset.databases.api.ZipFile") + mocker.patch( + "superset.databases.api.get_contents_from_bundle", + return_value=contents, + ) + command = mocker.patch("superset.databases.api.ImportDatabasesCommand") + + secrets = { + "databases/test.yaml": { + "$.credentials_info.private_key": "-----BEGIN PRIVATE KEY-----" + } + } + form_data = { + "formData": (BytesIO(b"test"), "test.zip"), + "encrypted_extra_secrets": json.dumps(secrets), + } + client.post( + "/api/v1/database/import/", + data=form_data, + content_type="multipart/form-data", + ) + + command.assert_called_with( + contents, + passwords=None, + overwrite=False, + ssh_tunnel_passwords=None, + ssh_tunnel_private_keys=None, + ssh_tunnel_priv_key_passwords=None, + encrypted_extra_secrets=secrets, ) diff --git a/tests/unit_tests/databases/commands/importers/v1/import_test.py b/tests/unit_tests/databases/commands/importers/v1/import_test.py index 7d58da170a9..90ca9584aba 100644 --- a/tests/unit_tests/databases/commands/importers/v1/import_test.py +++ b/tests/unit_tests/databases/commands/importers/v1/import_test.py @@ -250,3 +250,126 @@ def test_import_database_with_user_impersonation( database = import_database(config) assert database.impersonate_user is True + + +def test_import_database_with_masked_encrypted_extra_new_db( + mocker: MockerFixture, + session: Session, +) -> None: + """ + Test importing a new database with masked_encrypted_extra. + + When no existing DB matches the UUID, the masked_encrypted_extra value + should be stored as-is in encrypted_extra. + """ + from superset import security_manager + from superset.commands.database.importers.v1.utils import import_database + from superset.models.core import Database + from tests.integration_tests.fixtures.importexport import ( + database_config_with_masked_encrypted_extra, + ) + + mocker.patch.object(security_manager, "can_access", return_value=True) + mocker.patch("superset.commands.database.importers.v1.utils.add_permissions") + + engine = db.session.get_bind() + Database.metadata.create_all(engine) # pylint: disable=no-member + + config = copy.deepcopy(database_config_with_masked_encrypted_extra) + database = import_database(config) + + assert database.database_name == "imported_database_encrypted" + # masked_encrypted_extra should be stored as encrypted_extra + assert database.encrypted_extra is not None + encrypted = json.loads(database.encrypted_extra) + assert encrypted["credentials_info"]["type"] == "service_account" + assert encrypted["credentials_info"]["project_id"] == "test-project" + assert encrypted["credentials_info"]["private_key"] == ( + "-----BEGIN PRIVATE KEY-----\nMyPriVaTeKeY\n-----END PRIVATE KEY-----\n" + ) + + +def test_import_database_with_masked_encrypted_extra_existing_db( + mocker: MockerFixture, + session: Session, +) -> None: + """ + Test importing over an existing database with masked_encrypted_extra. + + When the import carries PASSWORD_MASK values for sensitive fields and + an existing DB has the real values, reveal_sensitive should restore + the original values from the existing DB's encrypted_extra. + """ + from superset import security_manager + from superset.commands.database.importers.v1.utils import import_database + from superset.constants import PASSWORD_MASK + from superset.models.core import Database + from tests.integration_tests.fixtures.importexport import ( + database_config_with_masked_encrypted_extra, + ) + + mocker.patch.object(security_manager, "can_access", return_value=True) + mocker.patch("superset.commands.database.importers.v1.utils.add_permissions") + + engine = db.session.get_bind() + Database.metadata.create_all(engine) # pylint: disable=no-member + + # First, create the existing database with real encrypted_extra + config = copy.deepcopy(database_config_with_masked_encrypted_extra) + import_database(config) + db.session.flush() + + # Now import again with masked values (simulating re-import) + config2 = copy.deepcopy(database_config_with_masked_encrypted_extra) + config2["masked_encrypted_extra"] = json.dumps( + { + "credentials_info": { + "type": "service_account", + "project_id": "updated-project", + "private_key": PASSWORD_MASK, + } + } + ) + database2 = import_database(config2, overwrite=True) + + # The masked private_key should be revealed from the existing DB + encrypted = json.loads(database2.encrypted_extra) + assert encrypted["credentials_info"]["project_id"] == "updated-project" + assert encrypted["credentials_info"]["private_key"] == ( + "-----BEGIN PRIVATE KEY-----\nMyPriVaTeKeY\n-----END PRIVATE KEY-----\n" + ) + assert encrypted["credentials_info"]["private_key"] != PASSWORD_MASK + + +def test_import_database_oauth2_redirect_is_nonfatal( + mocker: MockerFixture, + session: Session, +) -> None: + """ + Test that an OAuth2RedirectError from add_permissions is logged + and does not prevent the import from succeeding. + """ + from superset import security_manager + from superset.commands.database.importers.v1.utils import import_database + from superset.exceptions import OAuth2RedirectError + from superset.models.core import Database + from tests.integration_tests.fixtures.importexport import database_config + + mocker.patch.object(security_manager, "can_access", return_value=True) + mock_add_perms = mocker.patch( + "superset.commands.database.importers.v1.utils.add_permissions", + side_effect=OAuth2RedirectError( + url="https://oauth.example.com/authorize", + tab_id="abc-123", + redirect_uri="https://superset.example.com/callback", + ), + ) + + engine = db.session.get_bind() + Database.metadata.create_all(engine) # pylint: disable=no-member + + config = copy.deepcopy(database_config) + database = import_database(config) + + assert database.database_name == "imported_database" + mock_add_perms.assert_called_once_with(database) diff --git a/tests/unit_tests/databases/schema_tests.py b/tests/unit_tests/databases/schema_tests.py index a61e6300077..1fd14343a8a 100644 --- a/tests/unit_tests/databases/schema_tests.py +++ b/tests/unit_tests/databases/schema_tests.py @@ -23,6 +23,8 @@ import pytest from marshmallow import fields, Schema, ValidationError from pytest_mock import MockerFixture +from superset.utils import json + if TYPE_CHECKING: from superset.databases.schemas import DatabaseParametersSchemaMixin @@ -61,6 +63,24 @@ def dummy_engine(mocker: MockerFixture) -> None: mocker.patch("superset.databases.schemas.get_engine_spec", return_value=DummyEngine) [email protected] +def mock_bq_engine(mocker: MockerFixture) -> None: + """ + Fixture providing a mocked BQ engine spec. + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + mock_url = mocker.MagicMock() + mock_url.get_backend_name.return_value = "bigquery" + mock_url.get_driver_name.return_value = "bigquery" + + mocker.patch("superset.databases.schemas.make_url_safe", return_value=mock_url) + mocker.patch( + "superset.databases.schemas.get_engine_spec", + return_value=BigQueryEngineSpec, + ) + + def test_database_parameters_schema_mixin( dummy_engine: None, dummy_schema: "Schema", @@ -272,3 +292,89 @@ def test_oauth2_schema_extra() -> None: } ) assert payload == {"code": "SECRET", "state": "12345"} + + +def test_import_schema_rejects_both_encrypted_and_masked() -> None: + """ + Test that ImportV1DatabaseSchema rejects configs with both + encrypted_extra and masked_encrypted_extra. + """ + from superset.databases.schemas import ImportV1DatabaseSchema + + schema = ImportV1DatabaseSchema() + config = { + "database_name": "test_db", + "sqlalchemy_uri": "bigquery://test/", + "uuid": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "encrypted_extra": json.dumps({"secret": "value"}), + "masked_encrypted_extra": json.dumps({"secret": "XXXXXXXXXX"}), + "extra": {}, + "version": "1.0.0", + } + with pytest.raises(ValidationError) as exc_info: + schema.load(config) + assert "File contains both" in str(exc_info.value) + + +def test_import_schema_rejects_masked_fields_for_new_db( + mock_bq_engine: None, + mocker: MockerFixture, +) -> None: + """ + Test that ImportV1DatabaseSchema rejects configs with PASSWORD_MASK + values for a new DB (no existing UUID match). + """ + from superset.databases.schemas import ImportV1DatabaseSchema + + mock_session = mocker.patch("superset.databases.schemas.db.session") + mock_session.query.return_value.filter_by.return_value.first.return_value = None + + schema = ImportV1DatabaseSchema() + config = { + "database_name": "test_db", + "sqlalchemy_uri": "bigquery://test/", + "uuid": "bbbbbbbb-aaaa-cccc-dddd-eeeeeeeeeeff", + "masked_encrypted_extra": json.dumps( + {"credentials_info": {"private_key": "XXXXXXXXXX"}} + ), + "extra": {}, + "version": "1.0.0", + } + with pytest.raises(ValidationError) as exc_info: + schema.load(config) + error_messages = str(exc_info.value) + assert "Must provide value for masked_encrypted_extra field" in error_messages + assert "$.credentials_info.private_key" in error_messages + + +def test_import_schema_allows_masked_fields_for_existing_db( + mock_bq_engine: None, + mocker: MockerFixture, +) -> None: + """ + Test that ImportV1DatabaseSchema allows PASSWORD_MASK values when + the DB already exists (UUID match). The reveal will happen later + in import_database(). + """ + from superset.databases.schemas import ImportV1DatabaseSchema + + mock_session = mocker.patch("superset.databases.schemas.db.session") + mock_existing_db = mocker.MagicMock() + mock_session = mocker.patch("superset.databases.schemas.db.session") + mock_session.query.return_value.filter_by.return_value.first.return_value = ( + mock_existing_db + ) + + schema = ImportV1DatabaseSchema() + config = { + "database_name": "test_db", + "sqlalchemy_uri": "bigquery://test/", + "uuid": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "masked_encrypted_extra": json.dumps( + {"credentials_info": {"private_key": "XXXXXXXXXX"}} + ), + "extra": {}, + "version": "1.0.0", + } + # Should not raise - masked values are allowed for existing DBs + schema.load(config) diff --git a/tests/unit_tests/importexport/api_test.py b/tests/unit_tests/importexport/api_test.py index 1458631e5d5..1ecfd94861c 100644 --- a/tests/unit_tests/importexport/api_test.py +++ b/tests/unit_tests/importexport/api_test.py @@ -113,6 +113,58 @@ def test_import_assets( ssh_tunnel_passwords=None, ssh_tunnel_private_keys=None, ssh_tunnel_priv_key_passwords=None, + encrypted_extra_secrets=None, + ) + + +def test_import_assets_with_encrypted_extra_secrets( + mocker: MockerFixture, + client: Any, + full_api_access: None, +) -> None: + """ + Test that encrypted_extra_secrets are passed to ImportAssetsCommand. + """ + mocked_contents = { + "metadata.yaml": ( + "version: 1.0.0\ntype: assets\ntimestamp: '2022-01-01T00:00:00+00:00'\n" + ), + "databases/example.yaml": "<DATABASE CONTENTS>", + } + + ImportAssetsCommand = mocker.patch("superset.importexport.api.ImportAssetsCommand") # noqa: N806 + + root = Path("assets_export") + buf = BytesIO() + with ZipFile(buf, "w") as bundle: + for path, contents in mocked_contents.items(): + with bundle.open(str(root / path), "w") as fp: + fp.write(contents.encode()) + buf.seek(0) + + secrets = { + "assets_export/databases/example.yaml": { + "$.credentials_info.private_key": "-----BEGIN PRIVATE KEY-----\nKEY\n-----END PRIVATE KEY-----\n", # noqa: E501 + } + } + form_data = { + "bundle": (buf, "assets_export.zip"), + "encrypted_extra_secrets": json.dumps(secrets), + } + response = client.post( + "/api/v1/assets/import/", data=form_data, content_type="multipart/form-data" + ) + assert response.status_code == 200 + assert response.json == {"message": "OK"} + + ImportAssetsCommand.assert_called_with( + mocked_contents, + sparse=False, + passwords=None, + ssh_tunnel_passwords=None, + ssh_tunnel_private_keys=None, + ssh_tunnel_priv_key_passwords=None, + encrypted_extra_secrets=secrets, ) diff --git a/tests/unit_tests/utils/json_tests.py b/tests/unit_tests/utils/json_tests.py index 957ce5e251a..573f14f5aa4 100644 --- a/tests/unit_tests/utils/json_tests.py +++ b/tests/unit_tests/utils/json_tests.py @@ -19,6 +19,7 @@ import math import uuid from datetime import date, datetime, time, timedelta from decimal import Decimal +from typing import Any from unittest.mock import MagicMock import numpy as np @@ -26,6 +27,7 @@ import pandas as pd import pytest import pytz +from superset.constants import PASSWORD_MASK from superset.utils import json from superset.utils.core import ( zlib_compress, @@ -264,6 +266,119 @@ def test_json_int_dttm_ser(): json.json_int_dttm_ser(np.datetime64()) [email protected]( + "payload,path_values,expected_result", + [ + ( + { + "credentials_info": { + "type": "service_account", + "private_key": "XXXXXXXXXX", + }, + }, + {"$.credentials_info.private_key": "NEW_KEY"}, + { + "credentials_info": { + "type": "service_account", + "private_key": "NEW_KEY", + }, + }, + ), + ( + { + "auth_params": { + "privatekey_body": "XXXXXXXXXX", + "privatekey_pass": "XXXXXXXXXX", + }, + "other": "value", + }, + { + "$.auth_params.privatekey_body": "-----BEGIN PRIVATE KEY-----", + "$.auth_params.privatekey_pass": "passphrase", + }, + { + "auth_params": { + "privatekey_body": "-----BEGIN PRIVATE KEY-----", + "privatekey_pass": "passphrase", + }, + "other": "value", + }, + ), + ( + {"existing": "value"}, + {"$.nonexistent.path": "new_value"}, + {"existing": "value"}, + ), + ], +) +def test_set_masked_fields( + payload: dict[str, Any], + path_values: dict[str, Any], + expected_result: dict[str, Any], +) -> None: + """ + Test setting a value at a JSONPath location. + """ + result = json.set_masked_fields(payload, path_values) + assert result == expected_result + + [email protected]( + "payload,sensitive_fields,expected_result", + [ + ( + { + "credentials_info": { + "type": "service_account", + "private_key": PASSWORD_MASK, + }, + }, + {"$.credentials_info.private_key", "$.credentials_info.type"}, + ["$.credentials_info.private_key"], + ), + ( + { + "credentials_info": { + "private_key": "ACTUAL_KEY", + }, + }, + {"$.credentials_info.private_key"}, + [], + ), + ( + { + "auth_params": { + "privatekey_body": PASSWORD_MASK, + "privatekey_pass": "actual_pass", + }, + "oauth2_client_info": { + "secret": PASSWORD_MASK, + }, + }, + { + "$.auth_params.privatekey_body", + "$.auth_params.privatekey_pass", + "$.oauth2_client_info.secret", + }, + [ + "$.auth_params.privatekey_body", + "$.oauth2_client_info.secret", + ], + ), + ], +) +def test_get_masked_fields( + payload: dict[str, Any], + sensitive_fields: set[str], + expected_result: dict[str, Any], +) -> None: + """ + Test that get_masked_fields returns paths where value equals PASSWORD_MASK. + """ + masked = json.get_masked_fields(payload, sensitive_fields) + assert masked == sorted(expected_result) + + def test_format_timedelta(): assert json.format_timedelta(timedelta(0)) == "0:00:00" assert json.format_timedelta(timedelta(days=1)) == "1 day, 0:00:00"
