This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch folder-api in repository https://gitbox.apache.org/repos/asf/superset.git
commit b03aa11420515cb521979ea7dff619f310693a0f Author: Beto Dealmeida <[email protected]> AuthorDate: Mon Mar 3 15:48:57 2025 -0500 feat: dataset folders (backend) --- superset/commands/dataset/update.py | 74 +++++++- superset/connectors/sqla/models.py | 7 +- superset/datasets/api.py | 28 +-- superset/datasets/schemas.py | 15 +- ...25-03-03_20-52_94e7a3499973_add_folder_table.py | 42 +++++ tests/integration_tests/datasets/api_tests.py | 87 ++++++++- tests/unit_tests/commands/dataset/test_update.py | 195 ++++++++++++++++++++- 7 files changed, 427 insertions(+), 21 deletions(-) diff --git a/superset/commands/dataset/update.py b/superset/commands/dataset/update.py index 2772cc0ffa..a84248ca55 100644 --- a/superset/commands/dataset/update.py +++ b/superset/commands/dataset/update.py @@ -17,7 +17,7 @@ import logging from collections import Counter from functools import partial -from typing import Any, Optional +from typing import Any, cast, Optional from flask_appbuilder.models.sqla import Model from marshmallow import ValidationError @@ -39,8 +39,9 @@ from superset.commands.dataset.exceptions import ( DatasetNotFoundError, DatasetUpdateFailedError, ) -from superset.connectors.sqla.models import SqlaTable +from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.daos.dataset import DatasetDAO +from superset.datasets.schemas import FolderSchema from superset.exceptions import SupersetSecurityException from superset.sql_parse import Table from superset.utils.decorators import on_error, transaction @@ -127,16 +128,30 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand): except ValidationError as ex: exceptions.append(ex) - # Validate columns + self._validate_semantics(exceptions) + + if exceptions: + raise DatasetInvalidError(exceptions=exceptions) + + def _validate_semantics(self, exceptions: list[ValidationError]) -> None: + # we know we have a valid model + self._model = cast(SqlaTable, self._model) + if columns := self._properties.get("columns"): self._validate_columns(columns, exceptions) - # Validate metrics if metrics := self._properties.get("metrics"): self._validate_metrics(metrics, exceptions) - if exceptions: - raise DatasetInvalidError(exceptions=exceptions) + if folders := self._properties.get("folders"): + try: + validate_folders(folders, self._model.metrics, self._model.columns) + except ValidationError as ex: + exceptions.append(ex) + + # dump schema to convert UUID to string + schema = FolderSchema(many=True) + self._properties["folders"] = schema.dump(folders) def _validate_columns( self, columns: list[dict[str, Any]], exceptions: list[ValidationError] @@ -189,3 +204,50 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand): if count > 1 ] return duplicates + + +def validate_folders( + folders: list[FolderSchema], + metrics: list[SqlMetric], + columns: list[TableColumn], +) -> None: + """ + Additional folder validation. + + The marshmallow schema will validate the folder structure, but we still need to + check that UUIDs are valid, names are unique, and that there are no cycles. + """ + existing = { + "metric": {metric.uuid: metric.metric_name for metric in metrics}, + "column": {column.uuid: column.column_name for column in columns}, + } + + queue = folders.copy() + seen_uuids = set() + seen_names = set() + while queue: + obj = queue.pop(0) + uuid, name, type = obj["uuid"], obj["name"], obj["type"] + + if uuid in seen_uuids: + raise ValidationError("Found cycle in folder structure") + seen_uuids.add(uuid) + + if type == "folder" and name in seen_names: + raise ValidationError(f"Duplicate folder name: {name}") + seen_names.add(name) + + if type == "folder" and name.lower() in { + "metrics", + "columns", + }: + raise ValidationError(f"Folder cannot have name '{name}'") + + if type in {"metric", "column"}: + if uuid not in existing[type]: + raise ValidationError(f"Invalid UUID for {type} '{name}': {uuid}") + if name != existing[type][uuid]: + raise ValidationError(f"Mismatched name '{name}' for UUID '{uuid}'") + + if children := obj.get("children"): + queue.extend(children) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 6478fdf075..bbcf86d634 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -69,6 +69,7 @@ from sqlalchemy.sql import column, ColumnElement, literal_column, table from sqlalchemy.sql.elements import ColumnClause, TextClause from sqlalchemy.sql.expression import Label from sqlalchemy.sql.selectable import Alias, TableClause +from sqlalchemy.types import JSON from superset import app, db, is_feature_enabled, security_manager from superset.commands.dataset.exceptions import DatasetNotFoundError @@ -400,6 +401,7 @@ class BaseDatasource(AuditMixinNullable, ImportExportMixin): # pylint: disable= # one to many "columns": [o.data for o in self.columns], "metrics": [o.data for o in self.metrics], + "folders": self.folders, # TODO deprecate, move logic to JS "order_by_choices": self.order_by_choices, "owners": [owner.id for owner in self.owners], @@ -1018,6 +1020,7 @@ class TableColumn(AuditMixinNullable, ImportExportMixin, CertificationMixin, Mod "filterable", "groupby", "id", + "uuid", "is_certified", "is_dttm", "python_date_format", @@ -1065,7 +1068,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, CertificationMixin, Model "extra", "warning_text", ] - update_from_object_fields = list(s for s in export_fields if s != "table_id") # noqa: C400 + update_from_object_fields = [s for s in export_fields if s != "table_id"] export_parent = "table" def __repr__(self) -> str: @@ -1117,6 +1120,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, CertificationMixin, Model "description", "expression", "id", + "uuid", "is_certified", "metric_name", "warning_markdown", @@ -1193,6 +1197,7 @@ class SqlaTable( extra = Column(Text) normalize_columns = Column(Boolean, default=False) always_filter_main_dttm = Column(Boolean, default=False) + folders = Column(JSON, nullable=True) baselink = "tablemodelview" diff --git a/superset/datasets/api.py b/superset/datasets/api.py index b41f6395bd..cbdd8a5c9c 100644 --- a/superset/datasets/api.py +++ b/superset/datasets/api.py @@ -193,8 +193,10 @@ class DatasetRestApi(BaseSupersetModelRestApi): "metrics.id", "metrics.metric_name", "metrics.metric_type", + "metrics.uuid", "metrics.verbose_name", "metrics.warning_text", + "folders", "datasource_type", "url", "extra", @@ -620,9 +622,11 @@ class DatasetRestApi(BaseSupersetModelRestApi): return self.response(201, id=new_model.id, result=item) except DatasetInvalidError as ex: return self.response_422( - message=ex.normalized_messages() - if isinstance(ex, ValidationError) - else str(ex) + message=( + ex.normalized_messages() + if isinstance(ex, ValidationError) + else str(ex) + ) ) except DatasetCreateFailedError as ex: logger.error( @@ -1175,14 +1179,16 @@ class DatasetRestApi(BaseSupersetModelRestApi): def render_item_list(item_list: list[dict[str, Any]]) -> list[dict[str, Any]]: return [ - { - **item, - "rendered_expression": processor.process_template( - item["expression"] - ), - } - if item.get("expression") - else item + ( + { + **item, + "rendered_expression": processor.process_template( + item["expression"] + ), + } + if item.get("expression") + else item + ) for item in item_list ] diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py index 1d271d3dae..56ada8a1b0 100644 --- a/superset/datasets/schemas.py +++ b/superset/datasets/schemas.py @@ -20,7 +20,7 @@ from typing import Any from dateutil.parser import isoparse from flask_babel import lazy_gettext as _ from marshmallow import fields, pre_load, Schema, ValidationError -from marshmallow.validate import Length +from marshmallow.validate import Length, OneOf from superset.exceptions import SupersetMarshmallowValidationError from superset.utils import json @@ -88,6 +88,18 @@ class DatasetMetricsPutSchema(Schema): uuid = fields.UUID(allow_none=True) +class FolderSchema(Schema): + uuid = fields.UUID() + type = fields.String( + required=False, + validate=OneOf(["metric", "column", "folder"]), + ) + name = fields.String(required=True, validate=Length(1, 250)) + description = fields.String(allow_none=True, validate=Length(0, 1000)) + # folder can contain metrics, columns, and subfolders: + children = fields.List(fields.Nested(lambda: FolderSchema()), allow_none=True) + + class DatasetPostSchema(Schema): database = fields.Integer(required=True) catalog = fields.String(allow_none=True, validate=Length(0, 250)) @@ -121,6 +133,7 @@ class DatasetPutSchema(Schema): owners = fields.List(fields.Integer()) columns = fields.List(fields.Nested(DatasetColumnsPutSchema)) metrics = fields.List(fields.Nested(DatasetMetricsPutSchema)) + folders = fields.List(fields.Nested(FolderSchema), required=False) extra = fields.String(allow_none=True) is_managed_externally = fields.Boolean(allow_none=True, dump_default=False) external_url = fields.String(allow_none=True) diff --git a/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py b/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py new file mode 100644 index 0000000000..e95e3bbac1 --- /dev/null +++ b/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Add folder table + +Revision ID: 94e7a3499973 +Revises: 74ad1125881c +Create Date: 2025-03-03 20:52:24.585143 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.types import JSON + +# revision identifiers, used by Alembic. +revision = "94e7a3499973" +down_revision = "74ad1125881c" + + +def upgrade(): + op.add_column( + "tables", + sa.Column("folders", JSON, nullable=True), + ) + + +def downgrade(): + op.drop_column("tables", "folders") diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py index 4f6aacfca6..d6f7f2db2a 100644 --- a/tests/integration_tests/datasets/api_tests.py +++ b/tests/integration_tests/datasets/api_tests.py @@ -254,7 +254,7 @@ class TestDatasetApi(SupersetTestCase): "sql", "table_name", ] - assert sorted(list(response["result"][0].keys())) == expected_columns # noqa: C414 + assert sorted(response["result"][0]) == expected_columns def test_get_dataset_list_gamma(self): """ @@ -1562,6 +1562,91 @@ class TestDatasetApi(SupersetTestCase): db.session.delete(dataset) db.session.commit() + def test_update_dataset_add_folders(self): + """ + Dataset API: Test adding folders to dataset + """ + self.login(username="admin") + + dataset = self.insert_default_dataset() + dataset_data = { + "folders": [ + { + "type": "folder", + "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369", + "name": "My metrics", + "children": [ + { + "type": "metric", + "uuid": dataset.metrics[0].uuid, + "name": dataset.metrics[0].metric_name, + }, + ], + }, + { + "type": "folder", + "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642", + "name": "My columns", + "children": [ + { + "type": "folder", + "uuid": "b5330233-e323-4157-b767-98b16f00ca93", + "name": "Dimensions", + "children": [ + { + "type": "column", + "uuid": dataset.columns[1].uuid, + "name": dataset.columns[1].column_name, + }, + ], + }, + ], + }, + ] + } + + uri = f"api/v1/dataset/{dataset.id}" + rv = self.put_assert_metric(uri, dataset_data, "put") + assert rv.status_code == 200 + + model = db.session.query(SqlaTable).get(dataset.id) + assert model.folders == [ + { + "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369", + "type": "folder", + "name": "My metrics", + "children": [ + { + "uuid": dataset.metrics[0].uuid, + "type": "metric", + "name": "count", + } + ], + }, + { + "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642", + "type": "folder", + "name": "My columns", + "children": [ + { + "uuid": "b5330233-e323-4157-b767-98b16f00ca93", + "type": "folder", + "name": "Dimensions", + "children": [ + { + "uuid": dataset.columns[1].uuid, + "type": "column", + "name": "name", + } + ], + } + ], + }, + ] + + db.session.delete(dataset) + db.session.commit() + def test_delete_dataset_item(self): """ Dataset API: Test delete dataset item diff --git a/tests/unit_tests/commands/dataset/test_update.py b/tests/unit_tests/commands/dataset/test_update.py index 9e99edf204..1aa2675eb4 100644 --- a/tests/unit_tests/commands/dataset/test_update.py +++ b/tests/unit_tests/commands/dataset/test_update.py @@ -17,12 +17,14 @@ from unittest.mock import MagicMock import pytest +from marshmallow import ValidationError from pytest_mock import MockerFixture from superset import db from superset.commands.dataset.exceptions import DatasetInvalidError -from superset.commands.dataset.update import UpdateDatasetCommand +from superset.commands.dataset.update import UpdateDatasetCommand, validate_folders from superset.connectors.sqla.models import SqlaTable +from superset.datasets.schema import FolderSchema from superset.models.core import Database @@ -58,3 +60,194 @@ def test_update_uniqueness_error(mocker: MockerFixture) -> None: "schema": "qux", }, ).run() + + +def test_validate_folders(mocker: MockerFixture) -> None: + """ + Test the folder validation. + """ + metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")] + columns = [ + mocker.MagicMock(column_name="column1", uuid="uuid2"), + mocker.MagicMock(column_name="column2", uuid="uuid3"), + ] + + validate_folders(folders=[], metrics=metrics, columns=columns) + + folders: list[FolderSchema] = [ + { + "uuid": "uuid4", + "type": "folder", + "name": "My folder", + "children": [ + { + "uuid": "uuid1", + "type": "metric", + "name": "metric1", + }, + { + "uuid": "uuid2", + "type": "column", + "name": "column1", + }, + { + "uuid": "uuid3", + "type": "column", + "name": "column2", + }, + ], + }, + ] + validate_folders(folders=folders, metrics=metrics, columns=columns) + + +def test_validate_folders_cycle(mocker: MockerFixture) -> None: + """ + Test that we can detect cycles in the folder structure. + """ + folders: list[FolderSchema] = [ + { + "uuid": "uuid1", + "type": "folder", + "name": "My folder", + "children": [ + { + "uuid": "uuid2", + "type": "folder", + "name": "My other folder", + "children": [ + { + "uuid": "uuid1", + "type": "folder", + "name": "My folder", + "children": [], + }, + ], + }, + ], + }, + ] + + with pytest.raises(ValidationError) as excinfo: + validate_folders(folders=folders, metrics=[], columns=[]) + assert str(excinfo.value) == "Found cycle in folder structure" + + +def test_validate_folders_duplicate_name(mocker: MockerFixture) -> None: + """ + Test that we can detect duplicate folder names in the folder structure. + """ + folders: list[FolderSchema] = [ + { + "uuid": "uuid1", + "type": "folder", + "name": "My folder", + "children": [ + { + "uuid": "uuid2", + "type": "folder", + "name": "My other folder", + "children": [ + { + "uuid": "uuid3", + "type": "folder", + "name": "My folder", + "children": [], + }, + ], + }, + ], + }, + ] + + with pytest.raises(ValidationError) as excinfo: + validate_folders(folders=folders, metrics=[], columns=[]) + assert str(excinfo.value) == "Duplicate folder name: My folder" + + +def test_validate_folders_invalid_names(mocker: MockerFixture) -> None: + """ + Test that we can detect reserved folder names. + """ + folders_with_metrics: list[FolderSchema] = [ + { + "uuid": "uuid1", + "type": "folder", + "name": "Metrics", + "children": [], + }, + ] + folders_with_columns: list[FolderSchema] = [ + { + "uuid": "uuid1", + "type": "folder", + "name": "Columns", + "children": [], + }, + ] + + with pytest.raises(ValidationError) as excinfo: + validate_folders(folders=folders_with_metrics, metrics=[], columns=[]) + assert str(excinfo.value) == "Folder cannot have name 'Metrics'" + + with pytest.raises(ValidationError) as excinfo: + validate_folders(folders=folders_with_columns, metrics=[], columns=[]) + assert str(excinfo.value) == "Folder cannot have name 'Columns'" + + +def test_validate_folders_invalid_uuid(mocker: MockerFixture) -> None: + """ + Test that we can detect invalid UUIDs. + """ + metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")] + columns = [ + mocker.MagicMock(column_name="column1", uuid="uuid2"), + mocker.MagicMock(column_name="column2", uuid="uuid3"), + ] + folders: list[FolderSchema] = [ + { + "uuid": "uuid4", + "type": "folder", + "name": "My folder", + "children": [ + { + "uuid": "uuid2", + "type": "metric", + "name": "metric1", + }, + ], + }, + ] + + with pytest.raises(ValidationError) as excinfo: + validate_folders(folders=folders, metrics=metrics, columns=columns) + assert str(excinfo.value) == "Invalid UUID for metric 'metric1': uuid2" + + +def test_validate_folders_mismatched_name(mocker: MockerFixture) -> None: + """ + Test that we can detect mismatched names. + """ + metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")] + columns = [ + mocker.MagicMock(column_name="column1", uuid="uuid2"), + mocker.MagicMock(column_name="column2", uuid="uuid3"), + ] + folders: list[FolderSchema] = [ + { + "uuid": "uuid4", + "type": "folder", + "name": "My folder", + "children": [ + { + "uuid": "uuid1", + "type": "metric", + "name": "metric2", + }, + ], + }, + ] + + with pytest.raises(ValidationError) as excinfo: + validate_folders(folders=folders, metrics=metrics, columns=columns) + assert str(excinfo.value) == "Mismatched name 'metric2' for UUID 'uuid1'"
