This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch folder-api
in repository https://gitbox.apache.org/repos/asf/superset.git

commit b03aa11420515cb521979ea7dff619f310693a0f
Author: Beto Dealmeida <[email protected]>
AuthorDate: Mon Mar 3 15:48:57 2025 -0500

    feat: dataset folders (backend)
---
 superset/commands/dataset/update.py                |  74 +++++++-
 superset/connectors/sqla/models.py                 |   7 +-
 superset/datasets/api.py                           |  28 +--
 superset/datasets/schemas.py                       |  15 +-
 ...25-03-03_20-52_94e7a3499973_add_folder_table.py |  42 +++++
 tests/integration_tests/datasets/api_tests.py      |  87 ++++++++-
 tests/unit_tests/commands/dataset/test_update.py   | 195 ++++++++++++++++++++-
 7 files changed, 427 insertions(+), 21 deletions(-)

diff --git a/superset/commands/dataset/update.py 
b/superset/commands/dataset/update.py
index 2772cc0ffa..a84248ca55 100644
--- a/superset/commands/dataset/update.py
+++ b/superset/commands/dataset/update.py
@@ -17,7 +17,7 @@
 import logging
 from collections import Counter
 from functools import partial
-from typing import Any, Optional
+from typing import Any, cast, Optional
 
 from flask_appbuilder.models.sqla import Model
 from marshmallow import ValidationError
@@ -39,8 +39,9 @@ from superset.commands.dataset.exceptions import (
     DatasetNotFoundError,
     DatasetUpdateFailedError,
 )
-from superset.connectors.sqla.models import SqlaTable
+from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
 from superset.daos.dataset import DatasetDAO
+from superset.datasets.schemas import FolderSchema
 from superset.exceptions import SupersetSecurityException
 from superset.sql_parse import Table
 from superset.utils.decorators import on_error, transaction
@@ -127,16 +128,30 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
         except ValidationError as ex:
             exceptions.append(ex)
 
-        # Validate columns
+        self._validate_semantics(exceptions)
+
+        if exceptions:
+            raise DatasetInvalidError(exceptions=exceptions)
+
+    def _validate_semantics(self, exceptions: list[ValidationError]) -> None:
+        # we know we have a valid model
+        self._model = cast(SqlaTable, self._model)
+
         if columns := self._properties.get("columns"):
             self._validate_columns(columns, exceptions)
 
-        # Validate metrics
         if metrics := self._properties.get("metrics"):
             self._validate_metrics(metrics, exceptions)
 
-        if exceptions:
-            raise DatasetInvalidError(exceptions=exceptions)
+        if folders := self._properties.get("folders"):
+            try:
+                validate_folders(folders, self._model.metrics, 
self._model.columns)
+            except ValidationError as ex:
+                exceptions.append(ex)
+
+            # dump schema to convert UUID to string
+            schema = FolderSchema(many=True)
+            self._properties["folders"] = schema.dump(folders)
 
     def _validate_columns(
         self, columns: list[dict[str, Any]], exceptions: list[ValidationError]
@@ -189,3 +204,50 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
             if count > 1
         ]
         return duplicates
+
+
+def validate_folders(
+    folders: list[FolderSchema],
+    metrics: list[SqlMetric],
+    columns: list[TableColumn],
+) -> None:
+    """
+    Additional folder validation.
+
+    The marshmallow schema will validate the folder structure, but we still 
need to
+    check that UUIDs are valid, names are unique, and that there are no cycles.
+    """
+    existing = {
+        "metric": {metric.uuid: metric.metric_name for metric in metrics},
+        "column": {column.uuid: column.column_name for column in columns},
+    }
+
+    queue = folders.copy()
+    seen_uuids = set()
+    seen_names = set()
+    while queue:
+        obj = queue.pop(0)
+        uuid, name, type = obj["uuid"], obj["name"], obj["type"]
+
+        if uuid in seen_uuids:
+            raise ValidationError("Found cycle in folder structure")
+        seen_uuids.add(uuid)
+
+        if type == "folder" and name in seen_names:
+            raise ValidationError(f"Duplicate folder name: {name}")
+        seen_names.add(name)
+
+        if type == "folder" and name.lower() in {
+            "metrics",
+            "columns",
+        }:
+            raise ValidationError(f"Folder cannot have name '{name}'")
+
+        if type in {"metric", "column"}:
+            if uuid not in existing[type]:
+                raise ValidationError(f"Invalid UUID for {type} '{name}': 
{uuid}")
+            if name != existing[type][uuid]:
+                raise ValidationError(f"Mismatched name '{name}' for UUID 
'{uuid}'")
+
+        if children := obj.get("children"):
+            queue.extend(children)
diff --git a/superset/connectors/sqla/models.py 
b/superset/connectors/sqla/models.py
index 6478fdf075..bbcf86d634 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -69,6 +69,7 @@ from sqlalchemy.sql import column, ColumnElement, 
literal_column, table
 from sqlalchemy.sql.elements import ColumnClause, TextClause
 from sqlalchemy.sql.expression import Label
 from sqlalchemy.sql.selectable import Alias, TableClause
+from sqlalchemy.types import JSON
 
 from superset import app, db, is_feature_enabled, security_manager
 from superset.commands.dataset.exceptions import DatasetNotFoundError
@@ -400,6 +401,7 @@ class BaseDatasource(AuditMixinNullable, 
ImportExportMixin):  # pylint: disable=
             # one to many
             "columns": [o.data for o in self.columns],
             "metrics": [o.data for o in self.metrics],
+            "folders": self.folders,
             # TODO deprecate, move logic to JS
             "order_by_choices": self.order_by_choices,
             "owners": [owner.id for owner in self.owners],
@@ -1018,6 +1020,7 @@ class TableColumn(AuditMixinNullable, ImportExportMixin, 
CertificationMixin, Mod
             "filterable",
             "groupby",
             "id",
+            "uuid",
             "is_certified",
             "is_dttm",
             "python_date_format",
@@ -1065,7 +1068,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, 
CertificationMixin, Model
         "extra",
         "warning_text",
     ]
-    update_from_object_fields = list(s for s in export_fields if s != 
"table_id")  # noqa: C400
+    update_from_object_fields = [s for s in export_fields if s != "table_id"]
     export_parent = "table"
 
     def __repr__(self) -> str:
@@ -1117,6 +1120,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, 
CertificationMixin, Model
             "description",
             "expression",
             "id",
+            "uuid",
             "is_certified",
             "metric_name",
             "warning_markdown",
@@ -1193,6 +1197,7 @@ class SqlaTable(
     extra = Column(Text)
     normalize_columns = Column(Boolean, default=False)
     always_filter_main_dttm = Column(Boolean, default=False)
+    folders = Column(JSON, nullable=True)
 
     baselink = "tablemodelview"
 
diff --git a/superset/datasets/api.py b/superset/datasets/api.py
index b41f6395bd..cbdd8a5c9c 100644
--- a/superset/datasets/api.py
+++ b/superset/datasets/api.py
@@ -193,8 +193,10 @@ class DatasetRestApi(BaseSupersetModelRestApi):
         "metrics.id",
         "metrics.metric_name",
         "metrics.metric_type",
+        "metrics.uuid",
         "metrics.verbose_name",
         "metrics.warning_text",
+        "folders",
         "datasource_type",
         "url",
         "extra",
@@ -620,9 +622,11 @@ class DatasetRestApi(BaseSupersetModelRestApi):
             return self.response(201, id=new_model.id, result=item)
         except DatasetInvalidError as ex:
             return self.response_422(
-                message=ex.normalized_messages()
-                if isinstance(ex, ValidationError)
-                else str(ex)
+                message=(
+                    ex.normalized_messages()
+                    if isinstance(ex, ValidationError)
+                    else str(ex)
+                )
             )
         except DatasetCreateFailedError as ex:
             logger.error(
@@ -1175,14 +1179,16 @@ class DatasetRestApi(BaseSupersetModelRestApi):
 
         def render_item_list(item_list: list[dict[str, Any]]) -> 
list[dict[str, Any]]:
             return [
-                {
-                    **item,
-                    "rendered_expression": processor.process_template(
-                        item["expression"]
-                    ),
-                }
-                if item.get("expression")
-                else item
+                (
+                    {
+                        **item,
+                        "rendered_expression": processor.process_template(
+                            item["expression"]
+                        ),
+                    }
+                    if item.get("expression")
+                    else item
+                )
                 for item in item_list
             ]
 
diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py
index 1d271d3dae..56ada8a1b0 100644
--- a/superset/datasets/schemas.py
+++ b/superset/datasets/schemas.py
@@ -20,7 +20,7 @@ from typing import Any
 from dateutil.parser import isoparse
 from flask_babel import lazy_gettext as _
 from marshmallow import fields, pre_load, Schema, ValidationError
-from marshmallow.validate import Length
+from marshmallow.validate import Length, OneOf
 
 from superset.exceptions import SupersetMarshmallowValidationError
 from superset.utils import json
@@ -88,6 +88,18 @@ class DatasetMetricsPutSchema(Schema):
     uuid = fields.UUID(allow_none=True)
 
 
+class FolderSchema(Schema):
+    uuid = fields.UUID()
+    type = fields.String(
+        required=False,
+        validate=OneOf(["metric", "column", "folder"]),
+    )
+    name = fields.String(required=True, validate=Length(1, 250))
+    description = fields.String(allow_none=True, validate=Length(0, 1000))
+    # folder can contain metrics, columns, and subfolders:
+    children = fields.List(fields.Nested(lambda: FolderSchema()), 
allow_none=True)
+
+
 class DatasetPostSchema(Schema):
     database = fields.Integer(required=True)
     catalog = fields.String(allow_none=True, validate=Length(0, 250))
@@ -121,6 +133,7 @@ class DatasetPutSchema(Schema):
     owners = fields.List(fields.Integer())
     columns = fields.List(fields.Nested(DatasetColumnsPutSchema))
     metrics = fields.List(fields.Nested(DatasetMetricsPutSchema))
+    folders = fields.List(fields.Nested(FolderSchema), required=False)
     extra = fields.String(allow_none=True)
     is_managed_externally = fields.Boolean(allow_none=True, dump_default=False)
     external_url = fields.String(allow_none=True)
diff --git 
a/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py
 
b/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py
new file mode 100644
index 0000000000..e95e3bbac1
--- /dev/null
+++ 
b/superset/migrations/versions/2025-03-03_20-52_94e7a3499973_add_folder_table.py
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Add folder table
+
+Revision ID: 94e7a3499973
+Revises: 74ad1125881c
+Create Date: 2025-03-03 20:52:24.585143
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.types import JSON
+
+# revision identifiers, used by Alembic.
+revision = "94e7a3499973"
+down_revision = "74ad1125881c"
+
+
+def upgrade():
+    op.add_column(
+        "tables",
+        sa.Column("folders", JSON, nullable=True),
+    )
+
+
+def downgrade():
+    op.drop_column("tables", "folders")
diff --git a/tests/integration_tests/datasets/api_tests.py 
b/tests/integration_tests/datasets/api_tests.py
index 4f6aacfca6..d6f7f2db2a 100644
--- a/tests/integration_tests/datasets/api_tests.py
+++ b/tests/integration_tests/datasets/api_tests.py
@@ -254,7 +254,7 @@ class TestDatasetApi(SupersetTestCase):
             "sql",
             "table_name",
         ]
-        assert sorted(list(response["result"][0].keys())) == expected_columns  
# noqa: C414
+        assert sorted(response["result"][0]) == expected_columns
 
     def test_get_dataset_list_gamma(self):
         """
@@ -1562,6 +1562,91 @@ class TestDatasetApi(SupersetTestCase):
         db.session.delete(dataset)
         db.session.commit()
 
+    def test_update_dataset_add_folders(self):
+        """
+        Dataset API: Test adding folders to dataset
+        """
+        self.login(username="admin")
+
+        dataset = self.insert_default_dataset()
+        dataset_data = {
+            "folders": [
+                {
+                    "type": "folder",
+                    "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369",
+                    "name": "My metrics",
+                    "children": [
+                        {
+                            "type": "metric",
+                            "uuid": dataset.metrics[0].uuid,
+                            "name": dataset.metrics[0].metric_name,
+                        },
+                    ],
+                },
+                {
+                    "type": "folder",
+                    "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642",
+                    "name": "My columns",
+                    "children": [
+                        {
+                            "type": "folder",
+                            "uuid": "b5330233-e323-4157-b767-98b16f00ca93",
+                            "name": "Dimensions",
+                            "children": [
+                                {
+                                    "type": "column",
+                                    "uuid": dataset.columns[1].uuid,
+                                    "name": dataset.columns[1].column_name,
+                                },
+                            ],
+                        },
+                    ],
+                },
+            ]
+        }
+
+        uri = f"api/v1/dataset/{dataset.id}"
+        rv = self.put_assert_metric(uri, dataset_data, "put")
+        assert rv.status_code == 200
+
+        model = db.session.query(SqlaTable).get(dataset.id)
+        assert model.folders == [
+            {
+                "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369",
+                "type": "folder",
+                "name": "My metrics",
+                "children": [
+                    {
+                        "uuid": dataset.metrics[0].uuid,
+                        "type": "metric",
+                        "name": "count",
+                    }
+                ],
+            },
+            {
+                "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642",
+                "type": "folder",
+                "name": "My columns",
+                "children": [
+                    {
+                        "uuid": "b5330233-e323-4157-b767-98b16f00ca93",
+                        "type": "folder",
+                        "name": "Dimensions",
+                        "children": [
+                            {
+                                "uuid": dataset.columns[1].uuid,
+                                "type": "column",
+                                "name": "name",
+                            }
+                        ],
+                    }
+                ],
+            },
+        ]
+
+        db.session.delete(dataset)
+        db.session.commit()
+
     def test_delete_dataset_item(self):
         """
         Dataset API: Test delete dataset item
diff --git a/tests/unit_tests/commands/dataset/test_update.py 
b/tests/unit_tests/commands/dataset/test_update.py
index 9e99edf204..1aa2675eb4 100644
--- a/tests/unit_tests/commands/dataset/test_update.py
+++ b/tests/unit_tests/commands/dataset/test_update.py
@@ -17,12 +17,14 @@
 from unittest.mock import MagicMock
 
 import pytest
+from marshmallow import ValidationError
 from pytest_mock import MockerFixture
 
 from superset import db
 from superset.commands.dataset.exceptions import DatasetInvalidError
-from superset.commands.dataset.update import UpdateDatasetCommand
+from superset.commands.dataset.update import UpdateDatasetCommand, 
validate_folders
 from superset.connectors.sqla.models import SqlaTable
+from superset.datasets.schema import FolderSchema
 from superset.models.core import Database
 
 
@@ -58,3 +60,194 @@ def test_update_uniqueness_error(mocker: MockerFixture) -> 
None:
                 "schema": "qux",
             },
         ).run()
+
+
+def test_validate_folders(mocker: MockerFixture) -> None:
+    """
+    Test the folder validation.
+    """
+    metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")]
+    columns = [
+        mocker.MagicMock(column_name="column1", uuid="uuid2"),
+        mocker.MagicMock(column_name="column2", uuid="uuid3"),
+    ]
+
+    validate_folders(folders=[], metrics=metrics, columns=columns)
+
+    folders: list[FolderSchema] = [
+        {
+            "uuid": "uuid4",
+            "type": "folder",
+            "name": "My folder",
+            "children": [
+                {
+                    "uuid": "uuid1",
+                    "type": "metric",
+                    "name": "metric1",
+                },
+                {
+                    "uuid": "uuid2",
+                    "type": "column",
+                    "name": "column1",
+                },
+                {
+                    "uuid": "uuid3",
+                    "type": "column",
+                    "name": "column2",
+                },
+            ],
+        },
+    ]
+    validate_folders(folders=folders, metrics=metrics, columns=columns)
+
+
+def test_validate_folders_cycle(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect cycles in the folder structure.
+    """
+    folders: list[FolderSchema] = [
+        {
+            "uuid": "uuid1",
+            "type": "folder",
+            "name": "My folder",
+            "children": [
+                {
+                    "uuid": "uuid2",
+                    "type": "folder",
+                    "name": "My other folder",
+                    "children": [
+                        {
+                            "uuid": "uuid1",
+                            "type": "folder",
+                            "name": "My folder",
+                            "children": [],
+                        },
+                    ],
+                },
+            ],
+        },
+    ]
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=[], columns=[])
+    assert str(excinfo.value) == "Found cycle in folder structure"
+
+
+def test_validate_folders_duplicate_name(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect duplicate folder names in the folder structure.
+    """
+    folders: list[FolderSchema] = [
+        {
+            "uuid": "uuid1",
+            "type": "folder",
+            "name": "My folder",
+            "children": [
+                {
+                    "uuid": "uuid2",
+                    "type": "folder",
+                    "name": "My other folder",
+                    "children": [
+                        {
+                            "uuid": "uuid3",
+                            "type": "folder",
+                            "name": "My folder",
+                            "children": [],
+                        },
+                    ],
+                },
+            ],
+        },
+    ]
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=[], columns=[])
+    assert str(excinfo.value) == "Duplicate folder name: My folder"
+
+
+def test_validate_folders_invalid_names(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect reserved folder names.
+    """
+    folders_with_metrics: list[FolderSchema] = [
+        {
+            "uuid": "uuid1",
+            "type": "folder",
+            "name": "Metrics",
+            "children": [],
+        },
+    ]
+    folders_with_columns: list[FolderSchema] = [
+        {
+            "uuid": "uuid1",
+            "type": "folder",
+            "name": "Columns",
+            "children": [],
+        },
+    ]
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders_with_metrics, metrics=[], columns=[])
+    assert str(excinfo.value) == "Folder cannot have name 'Metrics'"
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders_with_columns, metrics=[], columns=[])
+    assert str(excinfo.value) == "Folder cannot have name 'Columns'"
+
+
+def test_validate_folders_invalid_uuid(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect invalid UUIDs.
+    """
+    metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")]
+    columns = [
+        mocker.MagicMock(column_name="column1", uuid="uuid2"),
+        mocker.MagicMock(column_name="column2", uuid="uuid3"),
+    ]
+    folders: list[FolderSchema] = [
+        {
+            "uuid": "uuid4",
+            "type": "folder",
+            "name": "My folder",
+            "children": [
+                {
+                    "uuid": "uuid2",
+                    "type": "metric",
+                    "name": "metric1",
+                },
+            ],
+        },
+    ]
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=metrics, columns=columns)
+    assert str(excinfo.value) == "Invalid UUID for metric 'metric1': uuid2"
+
+
+def test_validate_folders_mismatched_name(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect mismatched names.
+    """
+    metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")]
+    columns = [
+        mocker.MagicMock(column_name="column1", uuid="uuid2"),
+        mocker.MagicMock(column_name="column2", uuid="uuid3"),
+    ]
+    folders: list[FolderSchema] = [
+        {
+            "uuid": "uuid4",
+            "type": "folder",
+            "name": "My folder",
+            "children": [
+                {
+                    "uuid": "uuid1",
+                    "type": "metric",
+                    "name": "metric2",
+                },
+            ],
+        },
+    ]
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=metrics, columns=columns)
+    assert str(excinfo.value) == "Mismatched name 'metric2' for UUID 'uuid1'"

Reply via email to