This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch folder-api in repository https://gitbox.apache.org/repos/asf/superset.git
commit 11b8d36209ddf7dcd87417c556eacc68224eb4fd Author: Beto Dealmeida <[email protected]> AuthorDate: Mon Mar 3 15:48:57 2025 -0500 feat: dataset folders (backend) --- superset/commands/dataset/update.py | 58 ++++++++++++++++++++-- superset/connectors/sqla/models.py | 7 ++- superset/datasets/api.py | 28 +++++++---- superset/datasets/schemas.py | 15 +++++- ...25-03-03_20-52_94e7a3499973_add_folder_table.py | 42 ++++++++++++++++ 5 files changed, 134 insertions(+), 16 deletions(-) diff --git a/superset/commands/dataset/update.py b/superset/commands/dataset/update.py index 2772cc0ffa..abd7378486 100644 --- a/superset/commands/dataset/update.py +++ b/superset/commands/dataset/update.py @@ -20,7 +20,7 @@ from functools import partial from typing import Any, Optional from flask_appbuilder.models.sqla import Model -from marshmallow import ValidationError +from marshmallow import fields, ValidationError from sqlalchemy.exc import SQLAlchemyError from superset import security_manager @@ -41,6 +41,7 @@ from superset.commands.dataset.exceptions import ( ) from superset.connectors.sqla.models import SqlaTable from superset.daos.dataset import DatasetDAO +from superset.datasets.schemas import FolderSchema from superset.exceptions import SupersetSecurityException from superset.sql_parse import Table from superset.utils.decorators import on_error, transaction @@ -127,14 +128,19 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand): except ValidationError as ex: exceptions.append(ex) - # Validate columns if columns := self._properties.get("columns"): self._validate_columns(columns, exceptions) - # Validate metrics if metrics := self._properties.get("metrics"): self._validate_metrics(metrics, exceptions) + if folders := self._properties.get("folders"): + try: + schema = fields.List(fields.Nested(FolderSchema())) + self.validate_folders(schema.load(folders)) + except ValidationError as ex: + exceptions.append(ex) + if exceptions: raise DatasetInvalidError(exceptions=exceptions) @@ -189,3 +195,49 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand): if count > 1 ] return duplicates + + def validate_folders(self, folders: list[FolderSchema]) -> None: + """ + Additional folder validation. + + The marshmallow schema will validate the folder structure, but we still need to + check that UUIDs are valid, names are unique, and that there are no cycles. + """ + existing = { + "metric": { + metric["uuid"]: metric["metric_name"] + for metric in self._properties.get("metrics", []) + }, + "column": { + column["uuid"]: column["column_name"] + for column in self._properties.get("columns", []) + }, + } + + queue = folders + seen_uuids = set() + seen_names = set() + while True: + obj = queue.pop(0) + + if obj.uuid in seen_uuids: + raise ValidationError("Found cycle in folder structure") + seen_uuids.add(obj.uuid) + + if obj.type == "folder" and obj.name in seen_names: + raise ValidationError(f"Duplicate folder name: {obj.name}") + seen_names.add(obj.name) + + if obj.type == "folder" and obj.name.lower() in {"metrics", "columns"}: + raise ValidationError(f"Folder cannot have name '{obj.name}'") + + if obj.type in {"metric", "column"}: + if obj.uuid not in existing[obj.type]: + raise ValidationError("Invalid UUID: {obj.uuid}") + if obj.name != existing[obj.type][obj.uuid]: + raise ValidationError( + "Mismatched name '{obj.name}' for UUID '{obj.uuid}'" + ) + + if obj.children: + queue.extend(obj.children) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 6478fdf075..bbcf86d634 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -69,6 +69,7 @@ from sqlalchemy.sql import column, ColumnElement, literal_column, table from sqlalchemy.sql.elements import ColumnClause, TextClause from sqlalchemy.sql.expression import Label from sqlalchemy.sql.selectable import Alias, TableClause +from sqlalchemy.types import JSON from superset import app, db, is_feature_enabled, security_manager from superset.commands.dataset.exceptions import DatasetNotFoundError @@ -400,6 +401,7 @@ class BaseDatasource(AuditMixinNullable, ImportExportMixin): # pylint: disable= # one to many "columns": [o.data for o in self.columns], "metrics": [o.data for o in self.metrics], + "folders": self.folders, # TODO deprecate, move logic to JS "order_by_choices": self.order_by_choices, "owners": [owner.id for owner in self.owners], @@ -1018,6 +1020,7 @@ class TableColumn(AuditMixinNullable, ImportExportMixin, CertificationMixin, Mod "filterable", "groupby", "id", + "uuid", "is_certified", "is_dttm", "python_date_format", @@ -1065,7 +1068,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, CertificationMixin, Model "extra", "warning_text", ] - update_from_object_fields = list(s for s in export_fields if s != "table_id") # noqa: C400 + update_from_object_fields = [s for s in export_fields if s != "table_id"] export_parent = "table" def __repr__(self) -> str: @@ -1117,6 +1120,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, CertificationMixin, Model "description", "expression", "id", + "uuid", "is_certified", "metric_name", "warning_markdown", @@ -1193,6 +1197,7 @@ class SqlaTable( extra = Column(Text) normalize_columns = Column(Boolean, default=False) always_filter_main_dttm = Column(Boolean, default=False) + folders = Column(JSON, nullable=True) baselink = "tablemodelview" diff --git a/superset/datasets/api.py b/superset/datasets/api.py index b41f6395bd..cbdd8a5c9c 100644 --- a/superset/datasets/api.py +++ b/superset/datasets/api.py @@ -193,8 +193,10 @@ class DatasetRestApi(BaseSupersetModelRestApi): "metrics.id", "metrics.metric_name", "metrics.metric_type", + "metrics.uuid", "metrics.verbose_name", "metrics.warning_text", + "folders", "datasource_type", "url", "extra", @@ -620,9 +622,11 @@ class DatasetRestApi(BaseSupersetModelRestApi): return self.response(201, id=new_model.id, result=item) except DatasetInvalidError as ex: return self.response_422( - message=ex.normalized_messages() - if isinstance(ex, ValidationError) - else str(ex) + message=( + ex.normalized_messages() + if isinstance(ex, ValidationError) + else str(ex) + ) ) except DatasetCreateFailedError as ex: logger.error( @@ -1175,14 +1179,16 @@ class DatasetRestApi(BaseSupersetModelRestApi): def render_item_list(item_list: list[dict[str, Any]]) -> list[dict[str, Any]]: return [ - { - **item, - "rendered_expression": processor.process_template( - item["expression"] - ), - } - if item.get("expression") - else item + ( + { + **item, + "rendered_expression": processor.process_template( + item["expression"] + ), + } + if item.get("expression") + else item + ) for item in item_list ] diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py index 1d271d3dae..56ada8a1b0 100644 --- a/superset/datasets/schemas.py +++ b/superset/datasets/schemas.py @@ -20,7 +20,7 @@ from typing import Any from dateutil.parser import isoparse from flask_babel import lazy_gettext as _ from marshmallow import fields, pre_load, Schema, ValidationError -from marshmallow.validate import Length +from marshmallow.validate import Length, OneOf from superset.exceptions import SupersetMarshmallowValidationError from superset.utils import json @@ -88,6 +88,18 @@ class DatasetMetricsPutSchema(Schema): uuid = fields.UUID(allow_none=True) +class FolderSchema(Schema): + uuid = fields.UUID() + type = fields.String( + required=False, + validate=OneOf(["metric", "column", "folder"]), + ) + name = fields.String(required=True, validate=Length(1, 250)) + description = fields.String(allow_none=True, validate=Length(0, 1000)) + # folder can contain metrics, columns, and subfolders: + children = fields.List(fields.Nested(lambda: FolderSchema()), allow_none=True) + + class DatasetPostSchema(Schema): database = fields.Integer(required=True) catalog = fields.String(allow_none=True, validate=Length(0, 250)) @@ -121,6 +133,7 @@ class DatasetPutSchema(Schema): owners = fields.List(fields.Integer()) columns = fields.List(fields.Nested(DatasetColumnsPutSchema)) metrics = fields.List(fields.Nested(DatasetMetricsPutSchema)) + folders = fields.List(fields.Nested(FolderSchema), required=False) extra = fields.String(allow_none=True) is_managed_externally = fields.Boolean(allow_none=True, dump_default=False) external_url = fields.String(allow_none=True) diff --git a/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py b/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py new file mode 100644 index 0000000000..e95e3bbac1 --- /dev/null +++ b/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Add folder table + +Revision ID: 94e7a3499973 +Revises: 74ad1125881c +Create Date: 2025-03-03 20:52:24.585143 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.types import JSON + +# revision identifiers, used by Alembic. +revision = "94e7a3499973" +down_revision = "74ad1125881c" + + +def upgrade(): + op.add_column( + "tables", + sa.Column("folders", JSON, nullable=True), + ) + + +def downgrade(): + op.drop_column("tables", "folders")
