This is an automated email from the ASF dual-hosted git repository.

elizabeth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 7ab8534ef6 feat: dataset folders (backend) (#32520)
7ab8534ef6 is described below

commit 7ab8534ef693240c55e49d124d77a5a01e17d860
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Apr 11 14:38:08 2025 -0400

    feat: dataset folders (backend) (#32520)
    
    Co-authored-by: Maxime Beauchemin <[email protected]>
---
 .github/workflows/superset-python-unittest.yml     |   2 +-
 superset-frontend/package-lock.json                |   2 +-
 .../SqlLab/components/SqlEditorTabHeader/index.tsx |   4 +-
 superset/commands/dataset/update.py                |  83 ++++-
 superset/config.py                                 |   3 +
 superset/connectors/sqla/models.py                 |   8 +-
 superset/datasets/api.py                           |  38 ++-
 superset/datasets/schemas.py                       |  35 +-
 tests/integration_tests/charts/api_tests.py        |  16 +-
 tests/integration_tests/datasets/api_tests.py      |  93 ++++-
 tests/integration_tests/datasets/commands_tests.py |   2 +
 tests/unit_tests/commands/dataset/test_update.py   | 379 +++++++++++++++++++--
 tests/unit_tests/datasets/commands/export_test.py  |  67 ++++
 .../datasets/commands/importers/v1/import_test.py  | 166 +++++++++
 14 files changed, 838 insertions(+), 60 deletions(-)

diff --git a/.github/workflows/superset-python-unittest.yml 
b/.github/workflows/superset-python-unittest.yml
index c4cef8de24..a8feed453c 100644
--- a/.github/workflows/superset-python-unittest.yml
+++ b/.github/workflows/superset-python-unittest.yml
@@ -44,7 +44,7 @@ jobs:
           SUPERSET_TESTENV: true
           SUPERSET_SECRET_KEY: not-a-secret
         run: |
-          pytest --durations-min=0.5 --cov-report= --cov=superset 
./tests/common ./tests/unit_tests --cache-clear
+          pytest --durations-min=0.5 --cov-report= --cov=superset 
./tests/common ./tests/unit_tests --cache-clear --maxfail=50
       - name: Upload code coverage
         uses: codecov/codecov-action@v5
         with:
diff --git a/superset-frontend/package-lock.json 
b/superset-frontend/package-lock.json
index 8c070cc2d0..3749e68dd7 100644
--- a/superset-frontend/package-lock.json
+++ b/superset-frontend/package-lock.json
@@ -50863,7 +50863,7 @@
       "version": "0.20.3",
       "license": "Apache-2.0",
       "dependencies": {
-        "@types/react-redux": "^7.1.10",
+        "@types/react-redux": "^7.1.34",
         "d3-array": "^1.2.0",
         "dayjs": "^1.11.13",
         "lodash": "^4.17.21"
diff --git 
a/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx 
b/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx
index a7be15f8dd..fa01b60f42 100644
--- a/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx
+++ b/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx
@@ -212,12 +212,12 @@ const SqlEditorTabHeader: FC<Props> = ({ queryEditor }) 
=> {
           </Menu>
         }
       />
-      <TabTitle>{qe.name}</TabTitle>
+      <TabTitle>{qe.name}</TabTitle>{' '}
       <StatusIcon
         className="status-icon"
         iconSize="xs"
         iconColor={getStatusColor(queryState, theme)}
-      />
+      />{' '}
     </TabTitleWrapper>
   );
 };
diff --git a/superset/commands/dataset/update.py 
b/superset/commands/dataset/update.py
index 2772cc0ffa..7f6134d20a 100644
--- a/superset/commands/dataset/update.py
+++ b/superset/commands/dataset/update.py
@@ -17,13 +17,13 @@
 import logging
 from collections import Counter
 from functools import partial
-from typing import Any, Optional
+from typing import Any, cast, Optional
 
 from flask_appbuilder.models.sqla import Model
 from marshmallow import ValidationError
 from sqlalchemy.exc import SQLAlchemyError
 
-from superset import security_manager
+from superset import is_feature_enabled, security_manager
 from superset.commands.base import BaseCommand, UpdateMixin
 from superset.commands.dataset.exceptions import (
     DatabaseChangeValidationError,
@@ -39,8 +39,9 @@ from superset.commands.dataset.exceptions import (
     DatasetNotFoundError,
     DatasetUpdateFailedError,
 )
-from superset.connectors.sqla.models import SqlaTable
+from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
 from superset.daos.dataset import DatasetDAO
+from superset.datasets.schemas import FolderSchema
 from superset.exceptions import SupersetSecurityException
 from superset.sql_parse import Table
 from superset.utils.decorators import on_error, transaction
@@ -127,16 +128,30 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
         except ValidationError as ex:
             exceptions.append(ex)
 
-        # Validate columns
+        self._validate_semantics(exceptions)
+
+        if exceptions:
+            raise DatasetInvalidError(exceptions=exceptions)
+
+    def _validate_semantics(self, exceptions: list[ValidationError]) -> None:
+        # we know we have a valid model
+        self._model = cast(SqlaTable, self._model)
+
         if columns := self._properties.get("columns"):
             self._validate_columns(columns, exceptions)
 
-        # Validate metrics
         if metrics := self._properties.get("metrics"):
             self._validate_metrics(metrics, exceptions)
 
-        if exceptions:
-            raise DatasetInvalidError(exceptions=exceptions)
+        if folders := self._properties.get("folders"):
+            try:
+                validate_folders(folders, self._model.metrics, 
self._model.columns)
+            except ValidationError as ex:
+                exceptions.append(ex)
+
+            # dump schema to convert UUID to string
+            schema = FolderSchema(many=True)
+            self._properties["folders"] = schema.dump(folders)
 
     def _validate_columns(
         self, columns: list[dict[str, Any]], exceptions: list[ValidationError]
@@ -189,3 +204,57 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
             if count > 1
         ]
         return duplicates
+
+
+def validate_folders(  # noqa: C901
+    folders: list[FolderSchema],
+    metrics: list[SqlMetric],
+    columns: list[TableColumn],
+) -> None:
+    """
+    Additional folder validation.
+
+    The marshmallow schema will validate the folder structure, but we still 
need to
+    check that UUIDs are valid, names are unique and not reserved, and that 
there are
+    no cycles.
+    """
+    if not is_feature_enabled("DATASET_FOLDERS"):
+        raise ValidationError("Dataset folders are not enabled")
+
+    existing = {
+        *[metric.uuid for metric in metrics],
+        *[column.uuid for column in columns],
+    }
+
+    queue: list[tuple[FolderSchema, list[str]]] = [(folder, []) for folder in 
folders]
+    seen_uuids = set()
+    seen_fqns = set()  # fully qualified folder names
+    while queue:
+        obj, path = queue.pop(0)
+        uuid, name = obj["uuid"], obj.get("name")
+
+        if uuid in path:
+            raise ValidationError(f"Cycle detected: {uuid} appears in its 
ancestry")
+
+        if uuid in seen_uuids:
+            raise ValidationError(f"Duplicate UUID in folder structure: 
{uuid}")
+        seen_uuids.add(uuid)
+
+        # folders can have duplicate name as long as they're not siblings
+        if name:
+            fqn = tuple(path + [name])
+            if name and fqn in seen_fqns:
+                raise ValidationError(f"Duplicate folder name: {name}")
+            seen_fqns.add(fqn)
+
+            if name.lower() in {"metrics", "columns"}:
+                raise ValidationError(f"Folder cannot have name '{name}'")
+
+        # check if metric/column UUID exists
+        elif not name and uuid not in existing:
+            raise ValidationError(f"Invalid UUID: {uuid}")
+
+        # traverse children
+        if children := obj.get("children"):
+            path.append(uuid)
+            queue.extend((folder, path) for folder in children)
diff --git a/superset/config.py b/superset/config.py
index 78d6567a6a..cd5393569e 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -561,6 +561,9 @@ DEFAULT_FEATURE_FLAGS: dict[str, bool] = {
     "SLACK_ENABLE_AVATARS": False,
     # Allow users to optionally specify date formats in email subjects, which 
will be parsed if enabled. # noqa: E501
     "DATE_FORMAT_IN_EMAIL_SUBJECT": False,
+    # Allow metrics and columns to be grouped into (potentially nested) 
folders in the
+    # chart builder
+    "DATASET_FOLDERS": False,
 }
 
 # ------------------------------
diff --git a/superset/connectors/sqla/models.py 
b/superset/connectors/sqla/models.py
index 4a006cd9f3..d74b2ef5c0 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -69,6 +69,7 @@ from sqlalchemy.sql import column, ColumnElement, 
literal_column, table
 from sqlalchemy.sql.elements import ColumnClause, TextClause
 from sqlalchemy.sql.expression import Label
 from sqlalchemy.sql.selectable import Alias, TableClause
+from sqlalchemy.types import JSON
 
 from superset import app, db, is_feature_enabled, security_manager
 from superset.commands.dataset.exceptions import DatasetNotFoundError
@@ -400,6 +401,7 @@ class BaseDatasource(AuditMixinNullable, 
ImportExportMixin):  # pylint: disable=
             # one to many
             "columns": [o.data for o in self.columns],
             "metrics": [o.data for o in self.metrics],
+            "folders": self.folders,
             # TODO deprecate, move logic to JS
             "order_by_choices": self.order_by_choices,
             "owners": [owner.id for owner in self.owners],
@@ -1019,6 +1021,7 @@ class TableColumn(AuditMixinNullable, ImportExportMixin, 
CertificationMixin, Mod
             "filterable",
             "groupby",
             "id",
+            "uuid",
             "is_certified",
             "is_dttm",
             "python_date_format",
@@ -1066,7 +1069,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, 
CertificationMixin, Model
         "extra",
         "warning_text",
     ]
-    update_from_object_fields = list(s for s in export_fields if s != 
"table_id")  # noqa: C400
+    update_from_object_fields = [s for s in export_fields if s != "table_id"]
     export_parent = "table"
 
     def __repr__(self) -> str:
@@ -1118,6 +1121,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin, 
CertificationMixin, Model
             "description",
             "expression",
             "id",
+            "uuid",
             "is_certified",
             "metric_name",
             "warning_markdown",
@@ -1194,6 +1198,7 @@ class SqlaTable(
     extra = Column(Text)
     normalize_columns = Column(Boolean, default=False)
     always_filter_main_dttm = Column(Boolean, default=False)
+    folders = Column(JSON, nullable=True)
 
     baselink = "tablemodelview"
 
@@ -1215,6 +1220,7 @@ class SqlaTable(
         "extra",
         "normalize_columns",
         "always_filter_main_dttm",
+        "folders",
     ]
     update_from_object_fields = [f for f in export_fields if f != 
"database_id"]
     export_parent = "database"
diff --git a/superset/datasets/api.py b/superset/datasets/api.py
index 2cb3dc3f1b..85cc075667 100644
--- a/superset/datasets/api.py
+++ b/superset/datasets/api.py
@@ -35,7 +35,7 @@ from flask_babel import ngettext
 from jinja2.exceptions import TemplateSyntaxError
 from marshmallow import ValidationError
 
-from superset import event_logger
+from superset import event_logger, is_feature_enabled
 from superset.commands.dataset.create import CreateDatasetCommand
 from superset.commands.dataset.delete import DeleteDatasetCommand
 from superset.commands.dataset.duplicate import DuplicateDatasetCommand
@@ -194,8 +194,10 @@ class DatasetRestApi(BaseSupersetModelRestApi):
         "metrics.id",
         "metrics.metric_name",
         "metrics.metric_type",
+        "metrics.uuid",
         "metrics.verbose_name",
         "metrics.warning_text",
+        "folders",
         "datasource_type",
         "url",
         "extra",
@@ -621,9 +623,11 @@ class DatasetRestApi(BaseSupersetModelRestApi):
             return self.response(201, id=new_model.id, result=item)
         except DatasetInvalidError as ex:
             return self.response_422(
-                message=ex.normalized_messages()
-                if isinstance(ex, ValidationError)
-                else str(ex)
+                message=(
+                    ex.normalized_messages()
+                    if isinstance(ex, ValidationError)
+                    else str(ex)
+                )
             )
         except DatasetCreateFailedError as ex:
             logger.error(
@@ -1152,6 +1156,14 @@ class DatasetRestApi(BaseSupersetModelRestApi):
         response["id"] = pk
         response[API_RESULT_RES_KEY] = show_model_schema.dump(item, many=False)
 
+        # remove folders from resposne if `DATASET_FOLDERS` is disabled, so 
that it's
+        # possible to inspect if the feature is supported or not
+        if (
+            not is_feature_enabled("DATASET_FOLDERS")
+            and "folders" in response[API_RESULT_RES_KEY]
+        ):
+            del response[API_RESULT_RES_KEY]["folders"]
+
         if parse_boolean_string(request.args.get("include_rendered_sql")):
             try:
                 processor = get_template_processor(database=item.database)
@@ -1176,14 +1188,16 @@ class DatasetRestApi(BaseSupersetModelRestApi):
 
         def render_item_list(item_list: list[dict[str, Any]]) -> 
list[dict[str, Any]]:
             return [
-                {
-                    **item,
-                    "rendered_expression": processor.process_template(
-                        item["expression"]
-                    ),
-                }
-                if item.get("expression")
-                else item
+                (
+                    {
+                        **item,
+                        "rendered_expression": processor.process_template(
+                            item["expression"]
+                        ),
+                    }
+                    if item.get("expression")
+                    else item
+                )
                 for item in item_list
             ]
 
diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py
index 1d271d3dae..c6e13325dd 100644
--- a/superset/datasets/schemas.py
+++ b/superset/datasets/schemas.py
@@ -19,8 +19,8 @@ from typing import Any
 
 from dateutil.parser import isoparse
 from flask_babel import lazy_gettext as _
-from marshmallow import fields, pre_load, Schema, ValidationError
-from marshmallow.validate import Length
+from marshmallow import fields, pre_load, Schema, validates_schema, 
ValidationError
+from marshmallow.validate import Length, OneOf
 
 from superset.exceptions import SupersetMarshmallowValidationError
 from superset.utils import json
@@ -88,6 +88,36 @@ class DatasetMetricsPutSchema(Schema):
     uuid = fields.UUID(allow_none=True)
 
 
+class FolderSchema(Schema):
+    uuid = fields.UUID(required=True)
+    type = fields.String(
+        required=False,
+        validate=OneOf(["metric", "column", "folder"]),
+    )
+    name = fields.String(required=False, validate=Length(1, 250))
+    description = fields.String(
+        required=False,
+        allow_none=True,
+        validate=Length(0, 1000),
+    )
+    # folder can contain metrics, columns, and subfolders:
+    children = fields.List(
+        fields.Nested(lambda: FolderSchema()),
+        required=False,
+        allow_none=True,
+    )
+
+    @validates_schema
+    def validate_folder(self, data: dict[str, Any], **kwargs: Any) -> None:
+        if "uuid" in data and len(data) == 1:
+            # only UUID is present, this is a metric or column
+            return
+
+        # folder; must have children
+        if "name" in data and "children" not in data:
+            raise ValidationError("If 'name' is present, 'children' must be 
present.")
+
+
 class DatasetPostSchema(Schema):
     database = fields.Integer(required=True)
     catalog = fields.String(allow_none=True, validate=Length(0, 250))
@@ -121,6 +151,7 @@ class DatasetPutSchema(Schema):
     owners = fields.List(fields.Integer())
     columns = fields.List(fields.Nested(DatasetColumnsPutSchema))
     metrics = fields.List(fields.Nested(DatasetMetricsPutSchema))
+    folders = fields.List(fields.Nested(FolderSchema), required=False)
     extra = fields.String(allow_none=True)
     is_managed_externally = fields.Boolean(allow_none=True, dump_default=False)
     external_url = fields.String(allow_none=True)
diff --git a/tests/integration_tests/charts/api_tests.py 
b/tests/integration_tests/charts/api_tests.py
index d4e8f01993..f776f0e632 100644
--- a/tests/integration_tests/charts/api_tests.py
+++ b/tests/integration_tests/charts/api_tests.py
@@ -2132,9 +2132,9 @@ class TestChartApi(ApiOwnersTestCaseMixin, 
InsertChartMixin, SupersetTestCase):
         new_tag = db.session.query(Tag).filter(Tag.name == "second_tag").one()
 
         # get existing tag and add a new one
-        new_tags = [tag.id for tag in chart.tags if tag.type == TagType.custom]
-        new_tags.append(new_tag.id)
-        update_payload = {"tags": new_tags}
+        new_tags = {tag.id for tag in chart.tags if tag.type == TagType.custom}
+        new_tags.add(new_tag.id)
+        update_payload = {"tags": list(new_tags)}
 
         uri = f"api/v1/chart/{chart.id}"
         rv = self.put_assert_metric(uri, update_payload, "put")
@@ -2142,7 +2142,7 @@ class TestChartApi(ApiOwnersTestCaseMixin, 
InsertChartMixin, SupersetTestCase):
         model = db.session.query(Slice).get(chart.id)
 
         # Clean up system tags
-        tag_list = [tag.id for tag in model.tags if tag.type == TagType.custom]
+        tag_list = {tag.id for tag in model.tags if tag.type == TagType.custom}
         assert tag_list == new_tags
 
     @pytest.mark.usefixtures("create_chart_with_tag")
@@ -2191,9 +2191,9 @@ class TestChartApi(ApiOwnersTestCaseMixin, 
InsertChartMixin, SupersetTestCase):
         new_tag = db.session.query(Tag).filter(Tag.name == "second_tag").one()
 
         # get existing tag and add a new one
-        new_tags = [tag.id for tag in chart.tags if tag.type == TagType.custom]
-        new_tags.append(new_tag.id)
-        update_payload = {"tags": new_tags}
+        new_tags = {tag.id for tag in chart.tags if tag.type == TagType.custom}
+        new_tags.add(new_tag.id)
+        update_payload = {"tags": list(new_tags)}
 
         uri = f"api/v1/chart/{chart.id}"
         rv = self.put_assert_metric(uri, update_payload, "put")
@@ -2201,7 +2201,7 @@ class TestChartApi(ApiOwnersTestCaseMixin, 
InsertChartMixin, SupersetTestCase):
         model = db.session.query(Slice).get(chart.id)
 
         # Clean up system tags
-        tag_list = [tag.id for tag in model.tags if tag.type == TagType.custom]
+        tag_list = {tag.id for tag in model.tags if tag.type == TagType.custom}
         assert tag_list == new_tags
 
         security_manager.add_permission_role(alpha_role, write_tags_perm)
diff --git a/tests/integration_tests/datasets/api_tests.py 
b/tests/integration_tests/datasets/api_tests.py
index 2064239140..9990110bef 100644
--- a/tests/integration_tests/datasets/api_tests.py
+++ b/tests/integration_tests/datasets/api_tests.py
@@ -255,7 +255,7 @@ class TestDatasetApi(SupersetTestCase):
             "table_name",
             "uuid",
         ]
-        assert sorted(list(response["result"][0].keys())) == expected_columns  
# noqa: C414
+        assert sorted(response["result"][0]) == expected_columns
 
     def test_get_dataset_list_gamma(self):
         """
@@ -1563,6 +1563,89 @@ class TestDatasetApi(SupersetTestCase):
         db.session.delete(dataset)
         db.session.commit()
 
+    @with_feature_flags(DATASET_FOLDERS=True)
+    def test_update_dataset_add_folders(self):
+        """
+        Dataset API: Test adding folders to dataset
+        """
+        self.login(username="admin")
+
+        dataset = self.insert_default_dataset()
+        dataset_data = {
+            "folders": [
+                {
+                    "type": "folder",
+                    "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369",
+                    "name": "My metrics",
+                    "children": [
+                        {
+                            "type": "metric",
+                            "uuid": str(dataset.metrics[0].uuid),
+                        },
+                    ],
+                },
+                {
+                    "type": "folder",
+                    "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642",
+                    "name": "My columns",
+                    "children": [
+                        {
+                            "type": "folder",
+                            "uuid": "b5330233-e323-4157-b767-98b16f00ca93",
+                            "name": "Dimensions",
+                            "children": [
+                                {
+                                    "type": "column",
+                                    "uuid": str(dataset.columns[1].uuid),
+                                },
+                            ],
+                        },
+                    ],
+                },
+            ]
+        }
+
+        uri = f"api/v1/dataset/{dataset.id}"
+        rv = self.put_assert_metric(uri, dataset_data, "put")
+        print(rv.data.decode("utf-8"))
+        assert rv.status_code == 200
+
+        model = db.session.query(SqlaTable).get(dataset.id)
+        assert model.folders == [
+            {
+                "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369",
+                "type": "folder",
+                "name": "My metrics",
+                "children": [
+                    {
+                        "uuid": str(dataset.metrics[0].uuid),
+                        "type": "metric",
+                    }
+                ],
+            },
+            {
+                "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642",
+                "type": "folder",
+                "name": "My columns",
+                "children": [
+                    {
+                        "uuid": "b5330233-e323-4157-b767-98b16f00ca93",
+                        "type": "folder",
+                        "name": "Dimensions",
+                        "children": [
+                            {
+                                "uuid": str(dataset.columns[1].uuid),
+                                "type": "column",
+                            }
+                        ],
+                    }
+                ],
+            },
+        ]
+
+        db.session.delete(dataset)
+        db.session.commit()
+
     def test_delete_dataset_item(self):
         """
         Dataset API: Test delete dataset item
@@ -1956,7 +2039,7 @@ class TestDatasetApi(SupersetTestCase):
     @pytest.mark.usefixtures("create_datasets")
     def test_export_dataset_gamma(self):
         """
-        Dataset API: Test export dataset has gamma
+        Dataset API: Test export dataset as gamma
         """
 
         dataset = self.get_fixture_datasets()[0]
@@ -1966,7 +2049,7 @@ class TestDatasetApi(SupersetTestCase):
 
         self.login(GAMMA_USERNAME)
         rv = self.client.get(uri)
-        assert rv.status_code == 403
+        assert rv.status_code in (403, 404)
 
         perm1 = security_manager.find_permission_view_menu("can_export", 
"Dataset")
 
@@ -2016,7 +2099,7 @@ class TestDatasetApi(SupersetTestCase):
         self.login(ADMIN_USERNAME)
         rv = self.get_assert_metric(uri, "export")
 
-        assert rv.status_code == 404
+        assert rv.status_code in (403, 404)
 
     @pytest.mark.usefixtures("create_datasets")
     def test_export_dataset_bundle_gamma(self):
@@ -2032,7 +2115,7 @@ class TestDatasetApi(SupersetTestCase):
         self.login(GAMMA_USERNAME)
         rv = self.client.get(uri)
         # gamma users by default do not have access to this dataset
-        assert rv.status_code == 403
+        assert rv.status_code in (403, 404)
 
     @unittest.skip("Number of related objects depend on DB")
     @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
diff --git a/tests/integration_tests/datasets/commands_tests.py 
b/tests/integration_tests/datasets/commands_tests.py
index 2c0a9ce618..fb802bd1bf 100644
--- a/tests/integration_tests/datasets/commands_tests.py
+++ b/tests/integration_tests/datasets/commands_tests.py
@@ -171,6 +171,7 @@ class TestExportDatasetsCommand(SupersetTestCase):
                     "warning_text": None,
                 },
             ],
+            "folders": None,
             "normalize_columns": False,
             "always_filter_main_dttm": False,
             "offset": 0,
@@ -235,6 +236,7 @@ class TestExportDatasetsCommand(SupersetTestCase):
             "extra",
             "normalize_columns",
             "always_filter_main_dttm",
+            "folders",
             "uuid",
             "metrics",
             "columns",
diff --git a/tests/unit_tests/commands/dataset/test_update.py 
b/tests/unit_tests/commands/dataset/test_update.py
index 9e99edf204..fa2026b533 100644
--- a/tests/unit_tests/commands/dataset/test_update.py
+++ b/tests/unit_tests/commands/dataset/test_update.py
@@ -14,47 +14,384 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+
+from typing import cast
 from unittest.mock import MagicMock
 
 import pytest
+from marshmallow import ValidationError
 from pytest_mock import MockerFixture
 
 from superset import db
 from superset.commands.dataset.exceptions import DatasetInvalidError
-from superset.commands.dataset.update import UpdateDatasetCommand
+from superset.commands.dataset.update import UpdateDatasetCommand, 
validate_folders
 from superset.connectors.sqla.models import SqlaTable
+from superset.datasets.schemas import FolderSchema
 from superset.models.core import Database
+from tests.unit_tests.conftest import with_feature_flags
 
 
 @pytest.mark.usefixture("session")
 def test_update_uniqueness_error(mocker: MockerFixture) -> None:
+    """
+    Test uniqueness validation in dataset update command.
+    """
     SqlaTable.metadata.create_all(db.session.get_bind())
-    database = Database(database_name="my_db", sqlalchemy_uri="sqlite://")
-    bar = SqlaTable(table_name="bar", schema="foo", database=database)
-    baz = SqlaTable(table_name="baz", schema="qux", database=database)
-    db.session.add_all([database, bar, baz])
-    db.session.commit()
 
-    mock_g = mocker.patch("superset.security.manager.g")
-    mock_g.user = MagicMock()
+    # First, make sure session is clean
+    db.session.rollback()
+
+    try:
+        # Set up test data
+        database = Database(database_name="my_db", sqlalchemy_uri="sqlite://")
+        bar = SqlaTable(table_name="bar", schema="foo", database=database)
+        baz = SqlaTable(table_name="baz", schema="qux", database=database)
+        db.session.add_all([database, bar, baz])
+        db.session.commit()
+
+        # Set up mocks
+        mock_g = mocker.patch("superset.security.manager.g")
+        mock_g.user = MagicMock()
+        mocker.patch(
+            "superset.views.base.security_manager.can_access_all_datasources",
+            return_value=True,
+        )
+        mocker.patch(
+            
"superset.commands.dataset.update.security_manager.raise_for_ownership",
+            return_value=None,
+        )
+        mocker.patch.object(UpdateDatasetCommand, "compute_owners", 
return_value=[])
+
+        # Run the test that should fail
+        with pytest.raises(DatasetInvalidError):
+            UpdateDatasetCommand(
+                bar.id,
+                {
+                    "table_name": "baz",
+                    "schema": "qux",
+                },
+            ).run()
+    except Exception:
+        db.session.rollback()
+        raise
+    finally:
+        # Clean up - this will run even if the test fails
+        try:
+            db.session.query(SqlaTable).filter(
+                SqlaTable.table_name.in_(["bar", "baz"]),
+                SqlaTable.schema.in_(["foo", "qux"]),
+            ).delete(synchronize_session=False)
+            db.session.query(Database).filter(Database.database_name == 
"my_db").delete(
+                synchronize_session=False
+            )
+            db.session.commit()
+        except Exception:
+            db.session.rollback()
 
-    mocker.patch(
-        "superset.views.base.security_manager.can_access_all_datasources",
-        return_value=True,
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders(mocker: MockerFixture) -> None:
+    """
+    Test the folder validation.
+    """
+    metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")]
+    columns = [
+        mocker.MagicMock(column_name="column1", uuid="uuid2"),
+        mocker.MagicMock(column_name="column2", uuid="uuid3"),
+    ]
+
+    validate_folders(folders=[], metrics=metrics, columns=columns)
+
+    folders = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid4",
+                "type": "folder",
+                "name": "My folder",
+                "children": [
+                    {
+                        "uuid": "uuid1",
+                        "type": "metric",
+                        "name": "metric1",
+                    },
+                    {
+                        "uuid": "uuid2",
+                        "type": "column",
+                        "name": "column1",
+                    },
+                    {
+                        "uuid": "uuid3",
+                        "type": "column",
+                        "name": "column2",
+                    },
+                ],
+            },
+        ],
     )
+    validate_folders(folders=folders, metrics=metrics, columns=columns)
+
 
-    mocker.patch(
-        
"superset.commands.dataset.update.security_manager.raise_for_ownership",
-        return_value=None,
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_cycle(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect cycles in the folder structure.
+    """
+    folders = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid1",
+                "type": "folder",
+                "name": "My folder",
+                "children": [
+                    {
+                        "uuid": "uuid2",
+                        "type": "folder",
+                        "name": "My other folder",
+                        "children": [
+                            {
+                                "uuid": "uuid1",
+                                "type": "folder",
+                                "name": "My folder",
+                                "children": [],
+                            },
+                        ],
+                    },
+                ],
+            },
+        ],
     )
 
-    mocker.patch.object(UpdateDatasetCommand, "compute_owners", 
return_value=[])
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=[], columns=[])
+    assert str(excinfo.value) == "Cycle detected: uuid1 appears in its 
ancestry"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_inter_cycle(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect cycles between folders.
+    """
+    folders = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid1",
+                "type": "folder",
+                "name": "My folder",
+                "children": [
+                    {
+                        "uuid": "uuid2",
+                        "type": "folder",
+                        "name": "My other folder",
+                        "children": [],
+                    },
+                ],
+            },
+            {
+                "uuid": "uuid2",
+                "type": "folder",
+                "name": "My other folder",
+                "children": [
+                    {
+                        "uuid": "uuid1",
+                        "type": "folder",
+                        "name": "My folder",
+                        "children": [],
+                    },
+                ],
+            },
+        ],
+    )
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=[], columns=[])
+    assert str(excinfo.value) == "Duplicate UUID in folder structure: uuid2"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_duplicates(mocker: MockerFixture) -> None:
+    """
+    Test that metrics and columns belong to a single folder.
+    """
+    metrics = [mocker.MagicMock(metric_name="count", uuid="uuid2")]
+    folders = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid1",
+                "type": "folder",
+                "name": "My folder",
+                "children": [
+                    {
+                        "uuid": "uuid2",
+                        "type": "metric",
+                        "name": "count",
+                    },
+                ],
+            },
+            {
+                "uuid": "uuid2",
+                "type": "folder",
+                "name": "My other folder",
+                "children": [
+                    {
+                        "uuid": "uuid2",
+                        "type": "metric",
+                        "name": "count",
+                    },
+                ],
+            },
+        ],
+    )
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=metrics, columns=[])
+    assert str(excinfo.value) == "Duplicate UUID in folder structure: uuid2"
+
 
-    with pytest.raises(DatasetInvalidError):
-        UpdateDatasetCommand(
-            bar.id,
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_duplicate_name_not_siblings(mocker: MockerFixture) 
-> None:
+    """
+    Duplicate folder names are allowed if folders are not siblings.
+    """
+    folders = cast(
+        list[FolderSchema],
+        [
             {
-                "table_name": "baz",
-                "schema": "qux",
+                "uuid": "uuid1",
+                "type": "folder",
+                "name": "Sales",
+                "children": [
+                    {
+                        "uuid": "uuid2",
+                        "type": "folder",
+                        "name": "Core",
+                        "children": [],
+                    },
+                ],
             },
-        ).run()
+            {
+                "uuid": "uuid3",
+                "type": "folder",
+                "name": "Engineering",
+                "children": [
+                    {
+                        "uuid": "uuid4",
+                        "type": "folder",
+                        "name": "Core",
+                        "children": [],
+                    },
+                ],
+            },
+        ],
+    )
+
+    validate_folders(folders=folders, metrics=[], columns=[])
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_duplicate_name_siblings(mocker: MockerFixture) -> 
None:
+    """
+    Duplicate folder names are not allowed if folders are siblings.
+    """
+    folders = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid1",
+                "type": "folder",
+                "name": "Sales",
+                "children": [
+                    {
+                        "uuid": "uuid2",
+                        "type": "folder",
+                        "name": "Core",
+                        "children": [],
+                    },
+                ],
+            },
+            {
+                "uuid": "uuid3",
+                "type": "folder",
+                "name": "Sales",
+                "children": [
+                    {
+                        "uuid": "uuid4",
+                        "type": "folder",
+                        "name": "Other",
+                        "children": [],
+                    },
+                ],
+            },
+        ],
+    )
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders, metrics=[], columns=[])
+    assert str(excinfo.value) == "Duplicate folder name: Sales"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_invalid_names(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect reserved folder names.
+    """
+    folders_with_metrics = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid1",
+                "type": "folder",
+                "name": "Metrics",
+                "children": [],
+            },
+        ],
+    )
+    folders_with_columns = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid1",
+                "type": "folder",
+                "name": "Columns",
+                "children": [],
+            },
+        ],
+    )
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders_with_metrics, metrics=[], columns=[])
+    assert str(excinfo.value) == "Folder cannot have name 'Metrics'"
+
+    with pytest.raises(ValidationError) as excinfo:
+        validate_folders(folders=folders_with_columns, metrics=[], columns=[])
+    assert str(excinfo.value) == "Folder cannot have name 'Columns'"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_invalid_uuid(mocker: MockerFixture) -> None:
+    """
+    Test that we can detect invalid UUIDs.
+    """
+    folders = cast(
+        list[FolderSchema],
+        [
+            {
+                "uuid": "uuid4",
+                "type": "folder",
+                "name": "My folder",
+                "children": [
+                    {
+                        "uuid": "uuid2",
+                        "type": "metric",
+                        "name": "metric1",
+                    },
+                ],
+            },
+        ],
+    )
+
+    with pytest.raises(ValidationError):
+        FolderSchema(many=True).load(folders)
diff --git a/tests/unit_tests/datasets/commands/export_test.py 
b/tests/unit_tests/datasets/commands/export_test.py
index f42bbfc9cb..04bc989ba5 100644
--- a/tests/unit_tests/datasets/commands/export_test.py
+++ b/tests/unit_tests/datasets/commands/export_test.py
@@ -16,6 +16,8 @@
 # under the License.
 # pylint: disable=import-outside-toplevel, unused-argument, unused-import
 
+from uuid import UUID
+
 from sqlalchemy.orm.session import Session
 
 from superset import db
@@ -47,6 +49,7 @@ def test_export(session: Session) -> None:
             type="INTEGER",
             expression="revenue-expenses",
             extra=json.dumps({"certified_by": "User"}),
+            uuid=UUID("00000000-0000-0000-0000-000000000005"),
         ),
     ]
     metrics = [
@@ -54,6 +57,7 @@ def test_export(session: Session) -> None:
             metric_name="cnt",
             expression="COUNT(*)",
             extra=json.dumps({"warning_markdown": None}),
+            uuid=UUID("00000000-0000-0000-0000-000000000004"),
         ),
     ]
 
@@ -61,6 +65,46 @@ def test_export(session: Session) -> None:
         table_name="my_table",
         columns=columns,
         metrics=metrics,
+        folders=[
+            {
+                "uuid": "00000000-0000-0000-0000-000000000000",
+                "type": "folder",
+                "name": "Engineering",
+                "children": [
+                    {
+                        "uuid": "00000000-0000-0000-0000-000000000001",
+                        "type": "folder",
+                        "name": "Core",
+                        "children": [
+                            {
+                                "uuid": "00000000-0000-0000-0000-000000000004",
+                                "type": "metric",
+                                "name": "cnt",
+                            },
+                        ],
+                    },
+                ],
+            },
+            {
+                "uuid": "00000000-0000-0000-0000-000000000002",
+                "type": "folder",
+                "name": "Sales",
+                "children": [
+                    {
+                        "uuid": "00000000-0000-0000-0000-000000000003",
+                        "type": "folder",
+                        "name": "Core",
+                        "children": [
+                            {
+                                "uuid": "00000000-0000-0000-0000-000000000005",
+                                "type": "column",
+                                "name": "profit",
+                            },
+                        ],
+                    },
+                ],
+            },
+        ],
         main_dttm_col="ds",
         database=database,
         offset=-8,
@@ -126,6 +170,29 @@ extra:
   warning_markdown: '*WARNING*'
 normalize_columns: false
 always_filter_main_dttm: false
+folders:
+- uuid: 00000000-0000-0000-0000-000000000000
+  type: folder
+  name: Engineering
+  children:
+  - uuid: 00000000-0000-0000-0000-000000000001
+    type: folder
+    name: Core
+    children:
+    - uuid: 00000000-0000-0000-0000-000000000004
+      type: metric
+      name: cnt
+- uuid: 00000000-0000-0000-0000-000000000002
+  type: folder
+  name: Sales
+  children:
+  - uuid: 00000000-0000-0000-0000-000000000003
+    type: folder
+    name: Core
+    children:
+    - uuid: 00000000-0000-0000-0000-000000000005
+      type: column
+      name: profit
 uuid: {payload["uuid"]}
 metrics:
 - metric_name: cnt
diff --git a/tests/unit_tests/datasets/commands/importers/v1/import_test.py 
b/tests/unit_tests/datasets/commands/importers/v1/import_test.py
index 6bba6f039d..2d8c0ede4d 100644
--- a/tests/unit_tests/datasets/commands/importers/v1/import_test.py
+++ b/tests/unit_tests/datasets/commands/importers/v1/import_test.py
@@ -89,6 +89,7 @@ def test_import_dataset(mocker: MockerFixture, session: 
Session) -> None:
                 "d3format": None,
                 "extra": {"warning_markdown": None},
                 "warning_text": None,
+                "uuid": "00000000-0000-0000-0000-000000000001",
             }
         ],
         "columns": [
@@ -106,8 +107,49 @@ def test_import_dataset(mocker: MockerFixture, session: 
Session) -> None:
                 "extra": {
                     "certified_by": "User",
                 },
+                "uuid": "00000000-0000-0000-0000-000000000002",
             }
         ],
+        "folders": [
+            {
+                "uuid": "00000000-0000-0000-0000-000000000000",
+                "type": "folder",
+                "name": "Engineering",
+                "children": [
+                    {
+                        "uuid": "00000000-0000-0000-0000-000000000001",
+                        "type": "folder",
+                        "name": "Core",
+                        "children": [
+                            {
+                                "uuid": "00000000-0000-0000-0000-000000000004",
+                                "type": "metric",
+                                "name": "cnt",
+                            },
+                        ],
+                    },
+                ],
+            },
+            {
+                "uuid": "00000000-0000-0000-0000-000000000002",
+                "type": "folder",
+                "name": "Sales",
+                "children": [
+                    {
+                        "uuid": "00000000-0000-0000-0000-000000000003",
+                        "type": "folder",
+                        "name": "Core",
+                        "children": [
+                            {
+                                "uuid": "00000000-0000-0000-0000-000000000005",
+                                "type": "column",
+                                "name": "profit",
+                            },
+                        ],
+                    },
+                ],
+            },
+        ],
         "database_uuid": database.uuid,
         "database_id": database.id,
     }
@@ -139,6 +181,9 @@ def test_import_dataset(mocker: MockerFixture, session: 
Session) -> None:
     assert sqla_table.metrics[0].d3format is None
     assert sqla_table.metrics[0].extra == '{"warning_markdown": null}'
     assert sqla_table.metrics[0].warning_text is None
+    assert sqla_table.metrics[0].uuid == uuid.UUID(
+        "00000000-0000-0000-0000-000000000001"
+    )
     assert len(sqla_table.columns) == 1
     assert sqla_table.columns[0].column_name == "profit"
     assert sqla_table.columns[0].verbose_name is None
@@ -151,10 +196,131 @@ def test_import_dataset(mocker: MockerFixture, session: 
Session) -> None:
     assert sqla_table.columns[0].description is None
     assert sqla_table.columns[0].python_date_format is None
     assert sqla_table.columns[0].extra == '{"certified_by": "User"}'
+    assert sqla_table.columns[0].uuid == uuid.UUID(
+        "00000000-0000-0000-0000-000000000002"
+    )
+    assert sqla_table.folders == [
+        {
+            "uuid": "00000000-0000-0000-0000-000000000000",
+            "type": "folder",
+            "name": "Engineering",
+            "children": [
+                {
+                    "uuid": "00000000-0000-0000-0000-000000000001",
+                    "type": "folder",
+                    "name": "Core",
+                    "children": [
+                        {
+                            "uuid": "00000000-0000-0000-0000-000000000004",
+                            "type": "metric",
+                            "name": "cnt",
+                        },
+                    ],
+                },
+            ],
+        },
+        {
+            "uuid": "00000000-0000-0000-0000-000000000002",
+            "type": "folder",
+            "name": "Sales",
+            "children": [
+                {
+                    "uuid": "00000000-0000-0000-0000-000000000003",
+                    "type": "folder",
+                    "name": "Core",
+                    "children": [
+                        {
+                            "uuid": "00000000-0000-0000-0000-000000000005",
+                            "type": "column",
+                            "name": "profit",
+                        },
+                    ],
+                },
+            ],
+        },
+    ]
     assert sqla_table.database.uuid == database.uuid
     assert sqla_table.database.id == database.id
 
 
+def test_import_dataset_no_folder(mocker: MockerFixture, session: Session) -> 
None:
+    """
+    Test importing a dataset that was exported without folders.
+    """
+    from superset import security_manager
+    from superset.commands.dataset.importers.v1.utils import import_dataset
+    from superset.connectors.sqla.models import SqlaTable
+    from superset.models.core import Database
+
+    mocker.patch.object(security_manager, "can_access", return_value=True)
+
+    engine = db.session.get_bind()
+    SqlaTable.metadata.create_all(engine)  # pylint: disable=no-member
+
+    database = Database(database_name="my_database", 
sqlalchemy_uri="sqlite://")
+    db.session.add(database)
+    db.session.flush()
+
+    dataset_uuid = uuid.uuid4()
+    config = {
+        "table_name": "my_table",
+        "main_dttm_col": "ds",
+        "description": "This is the description",
+        "default_endpoint": None,
+        "offset": -8,
+        "cache_timeout": 3600,
+        "catalog": "public",
+        "schema": "my_schema",
+        "sql": None,
+        "params": {
+            "remote_id": 64,
+            "database_name": "examples",
+            "import_time": 1606677834,
+        },
+        "template_params": {
+            "answer": "42",
+        },
+        "filter_select_enabled": True,
+        "fetch_values_predicate": "foo IN (1, 2)",
+        "extra": {"warning_markdown": "*WARNING*"},
+        "uuid": dataset_uuid,
+        "metrics": [
+            {
+                "metric_name": "cnt",
+                "verbose_name": None,
+                "metric_type": None,
+                "expression": "COUNT(*)",
+                "description": None,
+                "d3format": None,
+                "extra": {"warning_markdown": None},
+                "warning_text": None,
+            }
+        ],
+        "columns": [
+            {
+                "column_name": "profit",
+                "verbose_name": None,
+                "is_dttm": None,
+                "is_active": None,
+                "type": "INTEGER",
+                "groupby": None,
+                "filterable": None,
+                "expression": "revenue-expenses",
+                "description": None,
+                "python_date_format": None,
+                "extra": {
+                    "certified_by": "User",
+                },
+            }
+        ],
+        "database_uuid": database.uuid,
+        "database_id": database.id,
+    }
+
+    sqla_table = import_dataset(config)
+    assert sqla_table.folders is None
+
+
 def test_import_dataset_duplicate_column(
     mocker: MockerFixture, session: Session
 ) -> None:

Reply via email to