This is an automated email from the ASF dual-hosted git repository.
elizabeth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 7ab8534ef6 feat: dataset folders (backend) (#32520)
7ab8534ef6 is described below
commit 7ab8534ef693240c55e49d124d77a5a01e17d860
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Apr 11 14:38:08 2025 -0400
feat: dataset folders (backend) (#32520)
Co-authored-by: Maxime Beauchemin <[email protected]>
---
.github/workflows/superset-python-unittest.yml | 2 +-
superset-frontend/package-lock.json | 2 +-
.../SqlLab/components/SqlEditorTabHeader/index.tsx | 4 +-
superset/commands/dataset/update.py | 83 ++++-
superset/config.py | 3 +
superset/connectors/sqla/models.py | 8 +-
superset/datasets/api.py | 38 ++-
superset/datasets/schemas.py | 35 +-
tests/integration_tests/charts/api_tests.py | 16 +-
tests/integration_tests/datasets/api_tests.py | 93 ++++-
tests/integration_tests/datasets/commands_tests.py | 2 +
tests/unit_tests/commands/dataset/test_update.py | 379 +++++++++++++++++++--
tests/unit_tests/datasets/commands/export_test.py | 67 ++++
.../datasets/commands/importers/v1/import_test.py | 166 +++++++++
14 files changed, 838 insertions(+), 60 deletions(-)
diff --git a/.github/workflows/superset-python-unittest.yml
b/.github/workflows/superset-python-unittest.yml
index c4cef8de24..a8feed453c 100644
--- a/.github/workflows/superset-python-unittest.yml
+++ b/.github/workflows/superset-python-unittest.yml
@@ -44,7 +44,7 @@ jobs:
SUPERSET_TESTENV: true
SUPERSET_SECRET_KEY: not-a-secret
run: |
- pytest --durations-min=0.5 --cov-report= --cov=superset
./tests/common ./tests/unit_tests --cache-clear
+ pytest --durations-min=0.5 --cov-report= --cov=superset
./tests/common ./tests/unit_tests --cache-clear --maxfail=50
- name: Upload code coverage
uses: codecov/codecov-action@v5
with:
diff --git a/superset-frontend/package-lock.json
b/superset-frontend/package-lock.json
index 8c070cc2d0..3749e68dd7 100644
--- a/superset-frontend/package-lock.json
+++ b/superset-frontend/package-lock.json
@@ -50863,7 +50863,7 @@
"version": "0.20.3",
"license": "Apache-2.0",
"dependencies": {
- "@types/react-redux": "^7.1.10",
+ "@types/react-redux": "^7.1.34",
"d3-array": "^1.2.0",
"dayjs": "^1.11.13",
"lodash": "^4.17.21"
diff --git
a/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx
b/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx
index a7be15f8dd..fa01b60f42 100644
--- a/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx
+++ b/superset-frontend/src/SqlLab/components/SqlEditorTabHeader/index.tsx
@@ -212,12 +212,12 @@ const SqlEditorTabHeader: FC<Props> = ({ queryEditor })
=> {
</Menu>
}
/>
- <TabTitle>{qe.name}</TabTitle>
+ <TabTitle>{qe.name}</TabTitle>{' '}
<StatusIcon
className="status-icon"
iconSize="xs"
iconColor={getStatusColor(queryState, theme)}
- />
+ />{' '}
</TabTitleWrapper>
);
};
diff --git a/superset/commands/dataset/update.py
b/superset/commands/dataset/update.py
index 2772cc0ffa..7f6134d20a 100644
--- a/superset/commands/dataset/update.py
+++ b/superset/commands/dataset/update.py
@@ -17,13 +17,13 @@
import logging
from collections import Counter
from functools import partial
-from typing import Any, Optional
+from typing import Any, cast, Optional
from flask_appbuilder.models.sqla import Model
from marshmallow import ValidationError
from sqlalchemy.exc import SQLAlchemyError
-from superset import security_manager
+from superset import is_feature_enabled, security_manager
from superset.commands.base import BaseCommand, UpdateMixin
from superset.commands.dataset.exceptions import (
DatabaseChangeValidationError,
@@ -39,8 +39,9 @@ from superset.commands.dataset.exceptions import (
DatasetNotFoundError,
DatasetUpdateFailedError,
)
-from superset.connectors.sqla.models import SqlaTable
+from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
from superset.daos.dataset import DatasetDAO
+from superset.datasets.schemas import FolderSchema
from superset.exceptions import SupersetSecurityException
from superset.sql_parse import Table
from superset.utils.decorators import on_error, transaction
@@ -127,16 +128,30 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
except ValidationError as ex:
exceptions.append(ex)
- # Validate columns
+ self._validate_semantics(exceptions)
+
+ if exceptions:
+ raise DatasetInvalidError(exceptions=exceptions)
+
+ def _validate_semantics(self, exceptions: list[ValidationError]) -> None:
+ # we know we have a valid model
+ self._model = cast(SqlaTable, self._model)
+
if columns := self._properties.get("columns"):
self._validate_columns(columns, exceptions)
- # Validate metrics
if metrics := self._properties.get("metrics"):
self._validate_metrics(metrics, exceptions)
- if exceptions:
- raise DatasetInvalidError(exceptions=exceptions)
+ if folders := self._properties.get("folders"):
+ try:
+ validate_folders(folders, self._model.metrics,
self._model.columns)
+ except ValidationError as ex:
+ exceptions.append(ex)
+
+ # dump schema to convert UUID to string
+ schema = FolderSchema(many=True)
+ self._properties["folders"] = schema.dump(folders)
def _validate_columns(
self, columns: list[dict[str, Any]], exceptions: list[ValidationError]
@@ -189,3 +204,57 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
if count > 1
]
return duplicates
+
+
+def validate_folders( # noqa: C901
+ folders: list[FolderSchema],
+ metrics: list[SqlMetric],
+ columns: list[TableColumn],
+) -> None:
+ """
+ Additional folder validation.
+
+ The marshmallow schema will validate the folder structure, but we still
need to
+ check that UUIDs are valid, names are unique and not reserved, and that
there are
+ no cycles.
+ """
+ if not is_feature_enabled("DATASET_FOLDERS"):
+ raise ValidationError("Dataset folders are not enabled")
+
+ existing = {
+ *[metric.uuid for metric in metrics],
+ *[column.uuid for column in columns],
+ }
+
+ queue: list[tuple[FolderSchema, list[str]]] = [(folder, []) for folder in
folders]
+ seen_uuids = set()
+ seen_fqns = set() # fully qualified folder names
+ while queue:
+ obj, path = queue.pop(0)
+ uuid, name = obj["uuid"], obj.get("name")
+
+ if uuid in path:
+ raise ValidationError(f"Cycle detected: {uuid} appears in its
ancestry")
+
+ if uuid in seen_uuids:
+ raise ValidationError(f"Duplicate UUID in folder structure:
{uuid}")
+ seen_uuids.add(uuid)
+
+ # folders can have duplicate name as long as they're not siblings
+ if name:
+ fqn = tuple(path + [name])
+ if name and fqn in seen_fqns:
+ raise ValidationError(f"Duplicate folder name: {name}")
+ seen_fqns.add(fqn)
+
+ if name.lower() in {"metrics", "columns"}:
+ raise ValidationError(f"Folder cannot have name '{name}'")
+
+ # check if metric/column UUID exists
+ elif not name and uuid not in existing:
+ raise ValidationError(f"Invalid UUID: {uuid}")
+
+ # traverse children
+ if children := obj.get("children"):
+ path.append(uuid)
+ queue.extend((folder, path) for folder in children)
diff --git a/superset/config.py b/superset/config.py
index 78d6567a6a..cd5393569e 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -561,6 +561,9 @@ DEFAULT_FEATURE_FLAGS: dict[str, bool] = {
"SLACK_ENABLE_AVATARS": False,
# Allow users to optionally specify date formats in email subjects, which
will be parsed if enabled. # noqa: E501
"DATE_FORMAT_IN_EMAIL_SUBJECT": False,
+ # Allow metrics and columns to be grouped into (potentially nested)
folders in the
+ # chart builder
+ "DATASET_FOLDERS": False,
}
# ------------------------------
diff --git a/superset/connectors/sqla/models.py
b/superset/connectors/sqla/models.py
index 4a006cd9f3..d74b2ef5c0 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -69,6 +69,7 @@ from sqlalchemy.sql import column, ColumnElement,
literal_column, table
from sqlalchemy.sql.elements import ColumnClause, TextClause
from sqlalchemy.sql.expression import Label
from sqlalchemy.sql.selectable import Alias, TableClause
+from sqlalchemy.types import JSON
from superset import app, db, is_feature_enabled, security_manager
from superset.commands.dataset.exceptions import DatasetNotFoundError
@@ -400,6 +401,7 @@ class BaseDatasource(AuditMixinNullable,
ImportExportMixin): # pylint: disable=
# one to many
"columns": [o.data for o in self.columns],
"metrics": [o.data for o in self.metrics],
+ "folders": self.folders,
# TODO deprecate, move logic to JS
"order_by_choices": self.order_by_choices,
"owners": [owner.id for owner in self.owners],
@@ -1019,6 +1021,7 @@ class TableColumn(AuditMixinNullable, ImportExportMixin,
CertificationMixin, Mod
"filterable",
"groupby",
"id",
+ "uuid",
"is_certified",
"is_dttm",
"python_date_format",
@@ -1066,7 +1069,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin,
CertificationMixin, Model
"extra",
"warning_text",
]
- update_from_object_fields = list(s for s in export_fields if s !=
"table_id") # noqa: C400
+ update_from_object_fields = [s for s in export_fields if s != "table_id"]
export_parent = "table"
def __repr__(self) -> str:
@@ -1118,6 +1121,7 @@ class SqlMetric(AuditMixinNullable, ImportExportMixin,
CertificationMixin, Model
"description",
"expression",
"id",
+ "uuid",
"is_certified",
"metric_name",
"warning_markdown",
@@ -1194,6 +1198,7 @@ class SqlaTable(
extra = Column(Text)
normalize_columns = Column(Boolean, default=False)
always_filter_main_dttm = Column(Boolean, default=False)
+ folders = Column(JSON, nullable=True)
baselink = "tablemodelview"
@@ -1215,6 +1220,7 @@ class SqlaTable(
"extra",
"normalize_columns",
"always_filter_main_dttm",
+ "folders",
]
update_from_object_fields = [f for f in export_fields if f !=
"database_id"]
export_parent = "database"
diff --git a/superset/datasets/api.py b/superset/datasets/api.py
index 2cb3dc3f1b..85cc075667 100644
--- a/superset/datasets/api.py
+++ b/superset/datasets/api.py
@@ -35,7 +35,7 @@ from flask_babel import ngettext
from jinja2.exceptions import TemplateSyntaxError
from marshmallow import ValidationError
-from superset import event_logger
+from superset import event_logger, is_feature_enabled
from superset.commands.dataset.create import CreateDatasetCommand
from superset.commands.dataset.delete import DeleteDatasetCommand
from superset.commands.dataset.duplicate import DuplicateDatasetCommand
@@ -194,8 +194,10 @@ class DatasetRestApi(BaseSupersetModelRestApi):
"metrics.id",
"metrics.metric_name",
"metrics.metric_type",
+ "metrics.uuid",
"metrics.verbose_name",
"metrics.warning_text",
+ "folders",
"datasource_type",
"url",
"extra",
@@ -621,9 +623,11 @@ class DatasetRestApi(BaseSupersetModelRestApi):
return self.response(201, id=new_model.id, result=item)
except DatasetInvalidError as ex:
return self.response_422(
- message=ex.normalized_messages()
- if isinstance(ex, ValidationError)
- else str(ex)
+ message=(
+ ex.normalized_messages()
+ if isinstance(ex, ValidationError)
+ else str(ex)
+ )
)
except DatasetCreateFailedError as ex:
logger.error(
@@ -1152,6 +1156,14 @@ class DatasetRestApi(BaseSupersetModelRestApi):
response["id"] = pk
response[API_RESULT_RES_KEY] = show_model_schema.dump(item, many=False)
+ # remove folders from resposne if `DATASET_FOLDERS` is disabled, so
that it's
+ # possible to inspect if the feature is supported or not
+ if (
+ not is_feature_enabled("DATASET_FOLDERS")
+ and "folders" in response[API_RESULT_RES_KEY]
+ ):
+ del response[API_RESULT_RES_KEY]["folders"]
+
if parse_boolean_string(request.args.get("include_rendered_sql")):
try:
processor = get_template_processor(database=item.database)
@@ -1176,14 +1188,16 @@ class DatasetRestApi(BaseSupersetModelRestApi):
def render_item_list(item_list: list[dict[str, Any]]) ->
list[dict[str, Any]]:
return [
- {
- **item,
- "rendered_expression": processor.process_template(
- item["expression"]
- ),
- }
- if item.get("expression")
- else item
+ (
+ {
+ **item,
+ "rendered_expression": processor.process_template(
+ item["expression"]
+ ),
+ }
+ if item.get("expression")
+ else item
+ )
for item in item_list
]
diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py
index 1d271d3dae..c6e13325dd 100644
--- a/superset/datasets/schemas.py
+++ b/superset/datasets/schemas.py
@@ -19,8 +19,8 @@ from typing import Any
from dateutil.parser import isoparse
from flask_babel import lazy_gettext as _
-from marshmallow import fields, pre_load, Schema, ValidationError
-from marshmallow.validate import Length
+from marshmallow import fields, pre_load, Schema, validates_schema,
ValidationError
+from marshmallow.validate import Length, OneOf
from superset.exceptions import SupersetMarshmallowValidationError
from superset.utils import json
@@ -88,6 +88,36 @@ class DatasetMetricsPutSchema(Schema):
uuid = fields.UUID(allow_none=True)
+class FolderSchema(Schema):
+ uuid = fields.UUID(required=True)
+ type = fields.String(
+ required=False,
+ validate=OneOf(["metric", "column", "folder"]),
+ )
+ name = fields.String(required=False, validate=Length(1, 250))
+ description = fields.String(
+ required=False,
+ allow_none=True,
+ validate=Length(0, 1000),
+ )
+ # folder can contain metrics, columns, and subfolders:
+ children = fields.List(
+ fields.Nested(lambda: FolderSchema()),
+ required=False,
+ allow_none=True,
+ )
+
+ @validates_schema
+ def validate_folder(self, data: dict[str, Any], **kwargs: Any) -> None:
+ if "uuid" in data and len(data) == 1:
+ # only UUID is present, this is a metric or column
+ return
+
+ # folder; must have children
+ if "name" in data and "children" not in data:
+ raise ValidationError("If 'name' is present, 'children' must be
present.")
+
+
class DatasetPostSchema(Schema):
database = fields.Integer(required=True)
catalog = fields.String(allow_none=True, validate=Length(0, 250))
@@ -121,6 +151,7 @@ class DatasetPutSchema(Schema):
owners = fields.List(fields.Integer())
columns = fields.List(fields.Nested(DatasetColumnsPutSchema))
metrics = fields.List(fields.Nested(DatasetMetricsPutSchema))
+ folders = fields.List(fields.Nested(FolderSchema), required=False)
extra = fields.String(allow_none=True)
is_managed_externally = fields.Boolean(allow_none=True, dump_default=False)
external_url = fields.String(allow_none=True)
diff --git a/tests/integration_tests/charts/api_tests.py
b/tests/integration_tests/charts/api_tests.py
index d4e8f01993..f776f0e632 100644
--- a/tests/integration_tests/charts/api_tests.py
+++ b/tests/integration_tests/charts/api_tests.py
@@ -2132,9 +2132,9 @@ class TestChartApi(ApiOwnersTestCaseMixin,
InsertChartMixin, SupersetTestCase):
new_tag = db.session.query(Tag).filter(Tag.name == "second_tag").one()
# get existing tag and add a new one
- new_tags = [tag.id for tag in chart.tags if tag.type == TagType.custom]
- new_tags.append(new_tag.id)
- update_payload = {"tags": new_tags}
+ new_tags = {tag.id for tag in chart.tags if tag.type == TagType.custom}
+ new_tags.add(new_tag.id)
+ update_payload = {"tags": list(new_tags)}
uri = f"api/v1/chart/{chart.id}"
rv = self.put_assert_metric(uri, update_payload, "put")
@@ -2142,7 +2142,7 @@ class TestChartApi(ApiOwnersTestCaseMixin,
InsertChartMixin, SupersetTestCase):
model = db.session.query(Slice).get(chart.id)
# Clean up system tags
- tag_list = [tag.id for tag in model.tags if tag.type == TagType.custom]
+ tag_list = {tag.id for tag in model.tags if tag.type == TagType.custom}
assert tag_list == new_tags
@pytest.mark.usefixtures("create_chart_with_tag")
@@ -2191,9 +2191,9 @@ class TestChartApi(ApiOwnersTestCaseMixin,
InsertChartMixin, SupersetTestCase):
new_tag = db.session.query(Tag).filter(Tag.name == "second_tag").one()
# get existing tag and add a new one
- new_tags = [tag.id for tag in chart.tags if tag.type == TagType.custom]
- new_tags.append(new_tag.id)
- update_payload = {"tags": new_tags}
+ new_tags = {tag.id for tag in chart.tags if tag.type == TagType.custom}
+ new_tags.add(new_tag.id)
+ update_payload = {"tags": list(new_tags)}
uri = f"api/v1/chart/{chart.id}"
rv = self.put_assert_metric(uri, update_payload, "put")
@@ -2201,7 +2201,7 @@ class TestChartApi(ApiOwnersTestCaseMixin,
InsertChartMixin, SupersetTestCase):
model = db.session.query(Slice).get(chart.id)
# Clean up system tags
- tag_list = [tag.id for tag in model.tags if tag.type == TagType.custom]
+ tag_list = {tag.id for tag in model.tags if tag.type == TagType.custom}
assert tag_list == new_tags
security_manager.add_permission_role(alpha_role, write_tags_perm)
diff --git a/tests/integration_tests/datasets/api_tests.py
b/tests/integration_tests/datasets/api_tests.py
index 2064239140..9990110bef 100644
--- a/tests/integration_tests/datasets/api_tests.py
+++ b/tests/integration_tests/datasets/api_tests.py
@@ -255,7 +255,7 @@ class TestDatasetApi(SupersetTestCase):
"table_name",
"uuid",
]
- assert sorted(list(response["result"][0].keys())) == expected_columns
# noqa: C414
+ assert sorted(response["result"][0]) == expected_columns
def test_get_dataset_list_gamma(self):
"""
@@ -1563,6 +1563,89 @@ class TestDatasetApi(SupersetTestCase):
db.session.delete(dataset)
db.session.commit()
+ @with_feature_flags(DATASET_FOLDERS=True)
+ def test_update_dataset_add_folders(self):
+ """
+ Dataset API: Test adding folders to dataset
+ """
+ self.login(username="admin")
+
+ dataset = self.insert_default_dataset()
+ dataset_data = {
+ "folders": [
+ {
+ "type": "folder",
+ "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369",
+ "name": "My metrics",
+ "children": [
+ {
+ "type": "metric",
+ "uuid": str(dataset.metrics[0].uuid),
+ },
+ ],
+ },
+ {
+ "type": "folder",
+ "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642",
+ "name": "My columns",
+ "children": [
+ {
+ "type": "folder",
+ "uuid": "b5330233-e323-4157-b767-98b16f00ca93",
+ "name": "Dimensions",
+ "children": [
+ {
+ "type": "column",
+ "uuid": str(dataset.columns[1].uuid),
+ },
+ ],
+ },
+ ],
+ },
+ ]
+ }
+
+ uri = f"api/v1/dataset/{dataset.id}"
+ rv = self.put_assert_metric(uri, dataset_data, "put")
+ print(rv.data.decode("utf-8"))
+ assert rv.status_code == 200
+
+ model = db.session.query(SqlaTable).get(dataset.id)
+ assert model.folders == [
+ {
+ "uuid": "b49ac3dd-c79b-42a4-9082-39ee74f3b369",
+ "type": "folder",
+ "name": "My metrics",
+ "children": [
+ {
+ "uuid": str(dataset.metrics[0].uuid),
+ "type": "metric",
+ }
+ ],
+ },
+ {
+ "uuid": "f5db85fa-75d6-45e5-bdce-c6194db80642",
+ "type": "folder",
+ "name": "My columns",
+ "children": [
+ {
+ "uuid": "b5330233-e323-4157-b767-98b16f00ca93",
+ "type": "folder",
+ "name": "Dimensions",
+ "children": [
+ {
+ "uuid": str(dataset.columns[1].uuid),
+ "type": "column",
+ }
+ ],
+ }
+ ],
+ },
+ ]
+
+ db.session.delete(dataset)
+ db.session.commit()
+
def test_delete_dataset_item(self):
"""
Dataset API: Test delete dataset item
@@ -1956,7 +2039,7 @@ class TestDatasetApi(SupersetTestCase):
@pytest.mark.usefixtures("create_datasets")
def test_export_dataset_gamma(self):
"""
- Dataset API: Test export dataset has gamma
+ Dataset API: Test export dataset as gamma
"""
dataset = self.get_fixture_datasets()[0]
@@ -1966,7 +2049,7 @@ class TestDatasetApi(SupersetTestCase):
self.login(GAMMA_USERNAME)
rv = self.client.get(uri)
- assert rv.status_code == 403
+ assert rv.status_code in (403, 404)
perm1 = security_manager.find_permission_view_menu("can_export",
"Dataset")
@@ -2016,7 +2099,7 @@ class TestDatasetApi(SupersetTestCase):
self.login(ADMIN_USERNAME)
rv = self.get_assert_metric(uri, "export")
- assert rv.status_code == 404
+ assert rv.status_code in (403, 404)
@pytest.mark.usefixtures("create_datasets")
def test_export_dataset_bundle_gamma(self):
@@ -2032,7 +2115,7 @@ class TestDatasetApi(SupersetTestCase):
self.login(GAMMA_USERNAME)
rv = self.client.get(uri)
# gamma users by default do not have access to this dataset
- assert rv.status_code == 403
+ assert rv.status_code in (403, 404)
@unittest.skip("Number of related objects depend on DB")
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
diff --git a/tests/integration_tests/datasets/commands_tests.py
b/tests/integration_tests/datasets/commands_tests.py
index 2c0a9ce618..fb802bd1bf 100644
--- a/tests/integration_tests/datasets/commands_tests.py
+++ b/tests/integration_tests/datasets/commands_tests.py
@@ -171,6 +171,7 @@ class TestExportDatasetsCommand(SupersetTestCase):
"warning_text": None,
},
],
+ "folders": None,
"normalize_columns": False,
"always_filter_main_dttm": False,
"offset": 0,
@@ -235,6 +236,7 @@ class TestExportDatasetsCommand(SupersetTestCase):
"extra",
"normalize_columns",
"always_filter_main_dttm",
+ "folders",
"uuid",
"metrics",
"columns",
diff --git a/tests/unit_tests/commands/dataset/test_update.py
b/tests/unit_tests/commands/dataset/test_update.py
index 9e99edf204..fa2026b533 100644
--- a/tests/unit_tests/commands/dataset/test_update.py
+++ b/tests/unit_tests/commands/dataset/test_update.py
@@ -14,47 +14,384 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+
+from typing import cast
from unittest.mock import MagicMock
import pytest
+from marshmallow import ValidationError
from pytest_mock import MockerFixture
from superset import db
from superset.commands.dataset.exceptions import DatasetInvalidError
-from superset.commands.dataset.update import UpdateDatasetCommand
+from superset.commands.dataset.update import UpdateDatasetCommand,
validate_folders
from superset.connectors.sqla.models import SqlaTable
+from superset.datasets.schemas import FolderSchema
from superset.models.core import Database
+from tests.unit_tests.conftest import with_feature_flags
@pytest.mark.usefixture("session")
def test_update_uniqueness_error(mocker: MockerFixture) -> None:
+ """
+ Test uniqueness validation in dataset update command.
+ """
SqlaTable.metadata.create_all(db.session.get_bind())
- database = Database(database_name="my_db", sqlalchemy_uri="sqlite://")
- bar = SqlaTable(table_name="bar", schema="foo", database=database)
- baz = SqlaTable(table_name="baz", schema="qux", database=database)
- db.session.add_all([database, bar, baz])
- db.session.commit()
- mock_g = mocker.patch("superset.security.manager.g")
- mock_g.user = MagicMock()
+ # First, make sure session is clean
+ db.session.rollback()
+
+ try:
+ # Set up test data
+ database = Database(database_name="my_db", sqlalchemy_uri="sqlite://")
+ bar = SqlaTable(table_name="bar", schema="foo", database=database)
+ baz = SqlaTable(table_name="baz", schema="qux", database=database)
+ db.session.add_all([database, bar, baz])
+ db.session.commit()
+
+ # Set up mocks
+ mock_g = mocker.patch("superset.security.manager.g")
+ mock_g.user = MagicMock()
+ mocker.patch(
+ "superset.views.base.security_manager.can_access_all_datasources",
+ return_value=True,
+ )
+ mocker.patch(
+
"superset.commands.dataset.update.security_manager.raise_for_ownership",
+ return_value=None,
+ )
+ mocker.patch.object(UpdateDatasetCommand, "compute_owners",
return_value=[])
+
+ # Run the test that should fail
+ with pytest.raises(DatasetInvalidError):
+ UpdateDatasetCommand(
+ bar.id,
+ {
+ "table_name": "baz",
+ "schema": "qux",
+ },
+ ).run()
+ except Exception:
+ db.session.rollback()
+ raise
+ finally:
+ # Clean up - this will run even if the test fails
+ try:
+ db.session.query(SqlaTable).filter(
+ SqlaTable.table_name.in_(["bar", "baz"]),
+ SqlaTable.schema.in_(["foo", "qux"]),
+ ).delete(synchronize_session=False)
+ db.session.query(Database).filter(Database.database_name ==
"my_db").delete(
+ synchronize_session=False
+ )
+ db.session.commit()
+ except Exception:
+ db.session.rollback()
- mocker.patch(
- "superset.views.base.security_manager.can_access_all_datasources",
- return_value=True,
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders(mocker: MockerFixture) -> None:
+ """
+ Test the folder validation.
+ """
+ metrics = [mocker.MagicMock(metric_name="metric1", uuid="uuid1")]
+ columns = [
+ mocker.MagicMock(column_name="column1", uuid="uuid2"),
+ mocker.MagicMock(column_name="column2", uuid="uuid3"),
+ ]
+
+ validate_folders(folders=[], metrics=metrics, columns=columns)
+
+ folders = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid4",
+ "type": "folder",
+ "name": "My folder",
+ "children": [
+ {
+ "uuid": "uuid1",
+ "type": "metric",
+ "name": "metric1",
+ },
+ {
+ "uuid": "uuid2",
+ "type": "column",
+ "name": "column1",
+ },
+ {
+ "uuid": "uuid3",
+ "type": "column",
+ "name": "column2",
+ },
+ ],
+ },
+ ],
)
+ validate_folders(folders=folders, metrics=metrics, columns=columns)
+
- mocker.patch(
-
"superset.commands.dataset.update.security_manager.raise_for_ownership",
- return_value=None,
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_cycle(mocker: MockerFixture) -> None:
+ """
+ Test that we can detect cycles in the folder structure.
+ """
+ folders = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "My folder",
+ "children": [
+ {
+ "uuid": "uuid2",
+ "type": "folder",
+ "name": "My other folder",
+ "children": [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "My folder",
+ "children": [],
+ },
+ ],
+ },
+ ],
+ },
+ ],
)
- mocker.patch.object(UpdateDatasetCommand, "compute_owners",
return_value=[])
+ with pytest.raises(ValidationError) as excinfo:
+ validate_folders(folders=folders, metrics=[], columns=[])
+ assert str(excinfo.value) == "Cycle detected: uuid1 appears in its
ancestry"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_inter_cycle(mocker: MockerFixture) -> None:
+ """
+ Test that we can detect cycles between folders.
+ """
+ folders = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "My folder",
+ "children": [
+ {
+ "uuid": "uuid2",
+ "type": "folder",
+ "name": "My other folder",
+ "children": [],
+ },
+ ],
+ },
+ {
+ "uuid": "uuid2",
+ "type": "folder",
+ "name": "My other folder",
+ "children": [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "My folder",
+ "children": [],
+ },
+ ],
+ },
+ ],
+ )
+
+ with pytest.raises(ValidationError) as excinfo:
+ validate_folders(folders=folders, metrics=[], columns=[])
+ assert str(excinfo.value) == "Duplicate UUID in folder structure: uuid2"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_duplicates(mocker: MockerFixture) -> None:
+ """
+ Test that metrics and columns belong to a single folder.
+ """
+ metrics = [mocker.MagicMock(metric_name="count", uuid="uuid2")]
+ folders = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "My folder",
+ "children": [
+ {
+ "uuid": "uuid2",
+ "type": "metric",
+ "name": "count",
+ },
+ ],
+ },
+ {
+ "uuid": "uuid2",
+ "type": "folder",
+ "name": "My other folder",
+ "children": [
+ {
+ "uuid": "uuid2",
+ "type": "metric",
+ "name": "count",
+ },
+ ],
+ },
+ ],
+ )
+
+ with pytest.raises(ValidationError) as excinfo:
+ validate_folders(folders=folders, metrics=metrics, columns=[])
+ assert str(excinfo.value) == "Duplicate UUID in folder structure: uuid2"
+
- with pytest.raises(DatasetInvalidError):
- UpdateDatasetCommand(
- bar.id,
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_duplicate_name_not_siblings(mocker: MockerFixture)
-> None:
+ """
+ Duplicate folder names are allowed if folders are not siblings.
+ """
+ folders = cast(
+ list[FolderSchema],
+ [
{
- "table_name": "baz",
- "schema": "qux",
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "Sales",
+ "children": [
+ {
+ "uuid": "uuid2",
+ "type": "folder",
+ "name": "Core",
+ "children": [],
+ },
+ ],
},
- ).run()
+ {
+ "uuid": "uuid3",
+ "type": "folder",
+ "name": "Engineering",
+ "children": [
+ {
+ "uuid": "uuid4",
+ "type": "folder",
+ "name": "Core",
+ "children": [],
+ },
+ ],
+ },
+ ],
+ )
+
+ validate_folders(folders=folders, metrics=[], columns=[])
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_duplicate_name_siblings(mocker: MockerFixture) ->
None:
+ """
+ Duplicate folder names are not allowed if folders are siblings.
+ """
+ folders = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "Sales",
+ "children": [
+ {
+ "uuid": "uuid2",
+ "type": "folder",
+ "name": "Core",
+ "children": [],
+ },
+ ],
+ },
+ {
+ "uuid": "uuid3",
+ "type": "folder",
+ "name": "Sales",
+ "children": [
+ {
+ "uuid": "uuid4",
+ "type": "folder",
+ "name": "Other",
+ "children": [],
+ },
+ ],
+ },
+ ],
+ )
+
+ with pytest.raises(ValidationError) as excinfo:
+ validate_folders(folders=folders, metrics=[], columns=[])
+ assert str(excinfo.value) == "Duplicate folder name: Sales"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_invalid_names(mocker: MockerFixture) -> None:
+ """
+ Test that we can detect reserved folder names.
+ """
+ folders_with_metrics = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "Metrics",
+ "children": [],
+ },
+ ],
+ )
+ folders_with_columns = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid1",
+ "type": "folder",
+ "name": "Columns",
+ "children": [],
+ },
+ ],
+ )
+
+ with pytest.raises(ValidationError) as excinfo:
+ validate_folders(folders=folders_with_metrics, metrics=[], columns=[])
+ assert str(excinfo.value) == "Folder cannot have name 'Metrics'"
+
+ with pytest.raises(ValidationError) as excinfo:
+ validate_folders(folders=folders_with_columns, metrics=[], columns=[])
+ assert str(excinfo.value) == "Folder cannot have name 'Columns'"
+
+
+@with_feature_flags(DATASET_FOLDERS=True)
+def test_validate_folders_invalid_uuid(mocker: MockerFixture) -> None:
+ """
+ Test that we can detect invalid UUIDs.
+ """
+ folders = cast(
+ list[FolderSchema],
+ [
+ {
+ "uuid": "uuid4",
+ "type": "folder",
+ "name": "My folder",
+ "children": [
+ {
+ "uuid": "uuid2",
+ "type": "metric",
+ "name": "metric1",
+ },
+ ],
+ },
+ ],
+ )
+
+ with pytest.raises(ValidationError):
+ FolderSchema(many=True).load(folders)
diff --git a/tests/unit_tests/datasets/commands/export_test.py
b/tests/unit_tests/datasets/commands/export_test.py
index f42bbfc9cb..04bc989ba5 100644
--- a/tests/unit_tests/datasets/commands/export_test.py
+++ b/tests/unit_tests/datasets/commands/export_test.py
@@ -16,6 +16,8 @@
# under the License.
# pylint: disable=import-outside-toplevel, unused-argument, unused-import
+from uuid import UUID
+
from sqlalchemy.orm.session import Session
from superset import db
@@ -47,6 +49,7 @@ def test_export(session: Session) -> None:
type="INTEGER",
expression="revenue-expenses",
extra=json.dumps({"certified_by": "User"}),
+ uuid=UUID("00000000-0000-0000-0000-000000000005"),
),
]
metrics = [
@@ -54,6 +57,7 @@ def test_export(session: Session) -> None:
metric_name="cnt",
expression="COUNT(*)",
extra=json.dumps({"warning_markdown": None}),
+ uuid=UUID("00000000-0000-0000-0000-000000000004"),
),
]
@@ -61,6 +65,46 @@ def test_export(session: Session) -> None:
table_name="my_table",
columns=columns,
metrics=metrics,
+ folders=[
+ {
+ "uuid": "00000000-0000-0000-0000-000000000000",
+ "type": "folder",
+ "name": "Engineering",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000001",
+ "type": "folder",
+ "name": "Core",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000004",
+ "type": "metric",
+ "name": "cnt",
+ },
+ ],
+ },
+ ],
+ },
+ {
+ "uuid": "00000000-0000-0000-0000-000000000002",
+ "type": "folder",
+ "name": "Sales",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000003",
+ "type": "folder",
+ "name": "Core",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000005",
+ "type": "column",
+ "name": "profit",
+ },
+ ],
+ },
+ ],
+ },
+ ],
main_dttm_col="ds",
database=database,
offset=-8,
@@ -126,6 +170,29 @@ extra:
warning_markdown: '*WARNING*'
normalize_columns: false
always_filter_main_dttm: false
+folders:
+- uuid: 00000000-0000-0000-0000-000000000000
+ type: folder
+ name: Engineering
+ children:
+ - uuid: 00000000-0000-0000-0000-000000000001
+ type: folder
+ name: Core
+ children:
+ - uuid: 00000000-0000-0000-0000-000000000004
+ type: metric
+ name: cnt
+- uuid: 00000000-0000-0000-0000-000000000002
+ type: folder
+ name: Sales
+ children:
+ - uuid: 00000000-0000-0000-0000-000000000003
+ type: folder
+ name: Core
+ children:
+ - uuid: 00000000-0000-0000-0000-000000000005
+ type: column
+ name: profit
uuid: {payload["uuid"]}
metrics:
- metric_name: cnt
diff --git a/tests/unit_tests/datasets/commands/importers/v1/import_test.py
b/tests/unit_tests/datasets/commands/importers/v1/import_test.py
index 6bba6f039d..2d8c0ede4d 100644
--- a/tests/unit_tests/datasets/commands/importers/v1/import_test.py
+++ b/tests/unit_tests/datasets/commands/importers/v1/import_test.py
@@ -89,6 +89,7 @@ def test_import_dataset(mocker: MockerFixture, session:
Session) -> None:
"d3format": None,
"extra": {"warning_markdown": None},
"warning_text": None,
+ "uuid": "00000000-0000-0000-0000-000000000001",
}
],
"columns": [
@@ -106,8 +107,49 @@ def test_import_dataset(mocker: MockerFixture, session:
Session) -> None:
"extra": {
"certified_by": "User",
},
+ "uuid": "00000000-0000-0000-0000-000000000002",
}
],
+ "folders": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000000",
+ "type": "folder",
+ "name": "Engineering",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000001",
+ "type": "folder",
+ "name": "Core",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000004",
+ "type": "metric",
+ "name": "cnt",
+ },
+ ],
+ },
+ ],
+ },
+ {
+ "uuid": "00000000-0000-0000-0000-000000000002",
+ "type": "folder",
+ "name": "Sales",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000003",
+ "type": "folder",
+ "name": "Core",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000005",
+ "type": "column",
+ "name": "profit",
+ },
+ ],
+ },
+ ],
+ },
+ ],
"database_uuid": database.uuid,
"database_id": database.id,
}
@@ -139,6 +181,9 @@ def test_import_dataset(mocker: MockerFixture, session:
Session) -> None:
assert sqla_table.metrics[0].d3format is None
assert sqla_table.metrics[0].extra == '{"warning_markdown": null}'
assert sqla_table.metrics[0].warning_text is None
+ assert sqla_table.metrics[0].uuid == uuid.UUID(
+ "00000000-0000-0000-0000-000000000001"
+ )
assert len(sqla_table.columns) == 1
assert sqla_table.columns[0].column_name == "profit"
assert sqla_table.columns[0].verbose_name is None
@@ -151,10 +196,131 @@ def test_import_dataset(mocker: MockerFixture, session:
Session) -> None:
assert sqla_table.columns[0].description is None
assert sqla_table.columns[0].python_date_format is None
assert sqla_table.columns[0].extra == '{"certified_by": "User"}'
+ assert sqla_table.columns[0].uuid == uuid.UUID(
+ "00000000-0000-0000-0000-000000000002"
+ )
+ assert sqla_table.folders == [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000000",
+ "type": "folder",
+ "name": "Engineering",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000001",
+ "type": "folder",
+ "name": "Core",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000004",
+ "type": "metric",
+ "name": "cnt",
+ },
+ ],
+ },
+ ],
+ },
+ {
+ "uuid": "00000000-0000-0000-0000-000000000002",
+ "type": "folder",
+ "name": "Sales",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000003",
+ "type": "folder",
+ "name": "Core",
+ "children": [
+ {
+ "uuid": "00000000-0000-0000-0000-000000000005",
+ "type": "column",
+ "name": "profit",
+ },
+ ],
+ },
+ ],
+ },
+ ]
assert sqla_table.database.uuid == database.uuid
assert sqla_table.database.id == database.id
+def test_import_dataset_no_folder(mocker: MockerFixture, session: Session) ->
None:
+ """
+ Test importing a dataset that was exported without folders.
+ """
+ from superset import security_manager
+ from superset.commands.dataset.importers.v1.utils import import_dataset
+ from superset.connectors.sqla.models import SqlaTable
+ from superset.models.core import Database
+
+ mocker.patch.object(security_manager, "can_access", return_value=True)
+
+ engine = db.session.get_bind()
+ SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
+
+ database = Database(database_name="my_database",
sqlalchemy_uri="sqlite://")
+ db.session.add(database)
+ db.session.flush()
+
+ dataset_uuid = uuid.uuid4()
+ config = {
+ "table_name": "my_table",
+ "main_dttm_col": "ds",
+ "description": "This is the description",
+ "default_endpoint": None,
+ "offset": -8,
+ "cache_timeout": 3600,
+ "catalog": "public",
+ "schema": "my_schema",
+ "sql": None,
+ "params": {
+ "remote_id": 64,
+ "database_name": "examples",
+ "import_time": 1606677834,
+ },
+ "template_params": {
+ "answer": "42",
+ },
+ "filter_select_enabled": True,
+ "fetch_values_predicate": "foo IN (1, 2)",
+ "extra": {"warning_markdown": "*WARNING*"},
+ "uuid": dataset_uuid,
+ "metrics": [
+ {
+ "metric_name": "cnt",
+ "verbose_name": None,
+ "metric_type": None,
+ "expression": "COUNT(*)",
+ "description": None,
+ "d3format": None,
+ "extra": {"warning_markdown": None},
+ "warning_text": None,
+ }
+ ],
+ "columns": [
+ {
+ "column_name": "profit",
+ "verbose_name": None,
+ "is_dttm": None,
+ "is_active": None,
+ "type": "INTEGER",
+ "groupby": None,
+ "filterable": None,
+ "expression": "revenue-expenses",
+ "description": None,
+ "python_date_format": None,
+ "extra": {
+ "certified_by": "User",
+ },
+ }
+ ],
+ "database_uuid": database.uuid,
+ "database_id": database.id,
+ }
+
+ sqla_table = import_dataset(config)
+ assert sqla_table.folders is None
+
+
def test_import_dataset_duplicate_column(
mocker: MockerFixture, session: Session
) -> None: