Copilot commented on code in PR #37120:
URL: https://github.com/apache/superset/pull/37120#discussion_r2713838107
##########
superset/commands/query/export.py:
##########
@@ -79,21 +83,23 @@ def _export(
database_slug = secure_filename(model.database.database_name)
file_name = f"databases/{database_slug}.yaml"
- payload = model.database.export_to_dict(
- recursive=False,
- include_parent_ref=False,
- include_defaults=True,
- export_uuids=True,
- )
- # TODO (betodealmeida): move this logic to export_to_dict once this
- # becomes the default export endpoint
- if "extra" in payload:
- try:
- payload["extra"] = json.loads(payload["extra"])
- except json.JSONDecodeError:
- logger.info("Unable to decode `extra` field: %s",
payload["extra"])
+ # Only yield if not already seen (similar to dataset export)
+ if file_name not in seen:
+ payload = model.database.export_to_dict(
+ recursive=False,
+ include_parent_ref=False,
+ include_defaults=True,
+ export_uuids=True,
+ )
+ # TODO (betodealmeida): move this logic to export_to_dict once
this
+ # becomes the default export endpoint
+ if "extra" in payload:
+ try:
+ payload["extra"] = json.loads(payload["extra"])
+ except json.JSONDecodeError:
+ logger.info("Unable to decode `extra` field: %s",
payload["extra"])
- payload["version"] = EXPORT_VERSION
+ payload["version"] = EXPORT_VERSION
- file_content = yaml.safe_dump(payload, sort_keys=False)
- yield file_name, lambda: file_content
+ file_content = yaml.safe_dump(payload, sort_keys=False)
+ yield file_name, lambda: file_content
Review Comment:
The lambda function may capture the wrong value of `file_content` due to
late binding in Python closures. If multiple database files are yielded in a
loop, all lambdas would reference the final value of `file_content` from the
last iteration. Consider using a default argument to capture the current value,
similar to: `lambda fc=file_content: fc`
##########
tests/integration_tests/dashboards/commands_tests.py:
##########
@@ -507,6 +507,93 @@ def test_export_dashboard_command_no_related(self,
mock_g1, mock_g2):
}
assert expected_paths == set(contents.keys())
+ @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices")
+ @patch("superset.security.manager.g")
+ @patch("superset.views.base.g")
+ def test_export_dashboard_cross_database_charts(self, mock_g1, mock_g2):
+ """
+ Test that dashboards with charts from multiple databases export
correctly.
+ This reproduces issue #37113 where charts from different databases
were missing.
+ """
+ mock_g1.user = security_manager.find_user("admin")
+ mock_g2.user = security_manager.find_user("admin")
+
+ # Create a second database for testing
+ second_db = Database(database_name="test_db_2",
sqlalchemy_uri="sqlite://")
+ db.session.add(second_db)
+
+ # Create a dataset in the second database
+ second_dataset = SqlaTable(
+ table_name="second_dataset",
+ database=second_db,
+ database_id=second_db.id,
+ columns=[],
+ )
+ db.session.add(second_dataset)
+
+ # Create a chart using the second database's dataset
+ chart_from_second_db = Slice(
+ slice_name="Chart from Second Database",
+ datasource_type="table",
+ datasource_id=second_dataset.id,
+ datasource_name=second_dataset.table_name,
+ viz_type="bar",
+ params=json.dumps({"viz_type": "bar"}),
+ )
+ db.session.add(chart_from_second_db)
+
+ # Get the example dashboard and add the new chart
+ example_dashboard = (
+ db.session.query(Dashboard).filter_by(slug="world_health").one()
+ )
+
+ # Store original charts count
+ original_charts_count = len(example_dashboard.slices)
+
+ # Add the new chart from different database to the dashboard
+ example_dashboard.slices.append(chart_from_second_db)
+ db.session.commit()
+
+ # Export the dashboard
+ command = ExportDashboardsCommand([example_dashboard.id])
+ contents = dict(command.run())
+
+ # Verify all databases are exported
+ db_files = [key for key in contents.keys() if
key.startswith("databases/")]
+ assert len(db_files) >= 2, f"Expected at least 2 database files, got
{db_files}"
+
+ # Verify the second database is included
+ assert "databases/test_db_2.yaml" in contents.keys(), \
+ f"Second database not found in export. Keys:
{list(contents.keys())}"
+
+ # Verify all charts are exported (original + new one)
+ chart_files = [key for key in contents.keys() if
key.startswith("charts/")]
+ assert len(chart_files) == original_charts_count + 1, \
+ f"Expected {original_charts_count + 1} charts, got
{len(chart_files)}"
+
+ # Verify the new chart from second database is included
+ chart_from_second_db_file = None
+ for key in chart_files:
+ if f"Chart_from_Second_Database_{chart_from_second_db.id}" in key:
+ chart_from_second_db_file = key
+ break
+
+ assert chart_from_second_db_file is not None, \
+ f"Chart from second database not found in export. Chart files:
{chart_files}"
Review Comment:
The assertion message formatting uses f-string with a variable that may not
exist at runtime. If 'Chart_from_Second_Database_' is not found in any chart
file, `chart_from_second_db_file` will be None when used in the assertion
message. While the assertion will still work, consider moving the detailed
error message construction before the assertion to avoid any potential runtime
issues.
```suggestion
error_msg = (
"Chart from second database not found in export. "
f"Chart files: {chart_files}"
)
assert chart_from_second_db_file is not None, error_msg
```
##########
superset/commands/export/models.py:
##########
@@ -51,23 +51,34 @@ def _file_content(model: Model) -> str:
@staticmethod
def _export(
- model: Model, export_related: bool = True
+ model: Model, export_related: bool = True, seen: set[str] | None = None
) -> Iterator[tuple[str, Callable[[], str]]]:
raise NotImplementedError("Subclasses MUST implement _export")
- def run(self) -> Iterator[tuple[str, Callable[[], str]]]:
+ def run(self, seen: set[str] | None = None) -> Iterator[tuple[str,
Callable[[], str]]]:
self.validate()
- metadata = {
- "version": EXPORT_VERSION,
- "type": self.dao.model_cls.__name__, # type: ignore
- "timestamp": datetime.now(tz=timezone.utc).isoformat(),
- }
- yield METADATA_FILE_NAME, lambda: yaml.safe_dump(metadata,
sort_keys=False)
+ # Use provided seen set or create new one
+ if seen is None:
+ seen = set()
+ should_add_metadata = True
+ else:
+ # If seen set is provided, we're being called from another command
+ should_add_metadata = False
+
+ # Only add metadata if this is the root command
+ if should_add_metadata:
+ metadata = {
+ "version": EXPORT_VERSION,
+ "type": self.dao.model_cls.__name__, # type: ignore
+ "timestamp": datetime.now(tz=timezone.utc).isoformat(),
+ }
+ if METADATA_FILE_NAME not in seen:
+ yield METADATA_FILE_NAME, lambda: yaml.safe_dump(metadata,
sort_keys=False)
+ seen.add(METADATA_FILE_NAME)
Review Comment:
The logic for determining when to add metadata may cause issues in nested
export scenarios. When a non-root command creates its own `seen` set (when
`seen is None`), it will set `should_add_metadata = True` and could potentially
yield duplicate metadata files. Consider checking if METADATA_FILE_NAME is
already in the `seen` set before deciding whether to add metadata, rather than
relying solely on whether `seen` was passed in.
```suggestion
# Only add metadata if it hasn't been added yet in this export graph
should_add_metadata = METADATA_FILE_NAME not in seen
if should_add_metadata:
metadata = {
"version": EXPORT_VERSION,
"type": self.dao.model_cls.__name__, # type: ignore
"timestamp": datetime.now(tz=timezone.utc).isoformat(),
}
yield METADATA_FILE_NAME, lambda: yaml.safe_dump(metadata,
sort_keys=False)
seen.add(METADATA_FILE_NAME)
```
##########
superset/commands/theme/export.py:
##########
@@ -67,8 +67,12 @@ def _file_content(model: Theme) -> str:
@staticmethod
def _export(
- model: Theme, export_related: bool = True
+ model: Theme, export_related: bool = True, seen: set[str] | None = None
) -> Iterator[tuple[str, Callable[[], str]]]:
+ # Initialize seen set if not provided (for consistency)
+ if seen is None:
+ seen = set()
+
Review Comment:
Variable seen is not used.
```suggestion
```
##########
tests/integration_tests/dashboards/commands_tests.py:
##########
@@ -507,6 +507,93 @@ def test_export_dashboard_command_no_related(self,
mock_g1, mock_g2):
}
assert expected_paths == set(contents.keys())
+ @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices")
+ @patch("superset.security.manager.g")
+ @patch("superset.views.base.g")
+ def test_export_dashboard_cross_database_charts(self, mock_g1, mock_g2):
+ """
+ Test that dashboards with charts from multiple databases export
correctly.
+ This reproduces issue #37113 where charts from different databases
were missing.
+ """
+ mock_g1.user = security_manager.find_user("admin")
+ mock_g2.user = security_manager.find_user("admin")
+
+ # Create a second database for testing
+ second_db = Database(database_name="test_db_2",
sqlalchemy_uri="sqlite://")
+ db.session.add(second_db)
+
+ # Create a dataset in the second database
+ second_dataset = SqlaTable(
+ table_name="second_dataset",
+ database=second_db,
+ database_id=second_db.id,
+ columns=[],
+ )
+ db.session.add(second_dataset)
+
+ # Create a chart using the second database's dataset
+ chart_from_second_db = Slice(
+ slice_name="Chart from Second Database",
+ datasource_type="table",
+ datasource_id=second_dataset.id,
+ datasource_name=second_dataset.table_name,
+ viz_type="bar",
+ params=json.dumps({"viz_type": "bar"}),
+ )
+ db.session.add(chart_from_second_db)
+
+ # Get the example dashboard and add the new chart
+ example_dashboard = (
+ db.session.query(Dashboard).filter_by(slug="world_health").one()
+ )
+
+ # Store original charts count
+ original_charts_count = len(example_dashboard.slices)
+
+ # Add the new chart from different database to the dashboard
+ example_dashboard.slices.append(chart_from_second_db)
+ db.session.commit()
+
+ # Export the dashboard
+ command = ExportDashboardsCommand([example_dashboard.id])
+ contents = dict(command.run())
+
+ # Verify all databases are exported
+ db_files = [key for key in contents.keys() if
key.startswith("databases/")]
+ assert len(db_files) >= 2, f"Expected at least 2 database files, got
{db_files}"
+
+ # Verify the second database is included
+ assert "databases/test_db_2.yaml" in contents.keys(), \
+ f"Second database not found in export. Keys:
{list(contents.keys())}"
+
+ # Verify all charts are exported (original + new one)
+ chart_files = [key for key in contents.keys() if
key.startswith("charts/")]
+ assert len(chart_files) == original_charts_count + 1, \
+ f"Expected {original_charts_count + 1} charts, got
{len(chart_files)}"
+
+ # Verify the new chart from second database is included
+ chart_from_second_db_file = None
+ for key in chart_files:
+ if f"Chart_from_Second_Database_{chart_from_second_db.id}" in key:
+ chart_from_second_db_file = key
+ break
+
+ assert chart_from_second_db_file is not None, \
+ f"Chart from second database not found in export. Chart files:
{chart_files}"
+
+ # Verify the dataset from second database is included
+ dataset_files = [key for key in contents.keys() if
key.startswith("datasets/")]
+ second_dataset_file =
f"datasets/test_db_2/second_dataset_{second_dataset.id}.yaml"
+ assert second_dataset_file in contents.keys(), \
+ f"Second dataset not found. Dataset files: {dataset_files}"
+
+ # Clean up
+ example_dashboard.slices.remove(chart_from_second_db)
+ db.session.delete(chart_from_second_db)
+ db.session.delete(second_dataset)
+ db.session.delete(second_db)
+ db.session.commit()
Review Comment:
The test cleanup is not exception-safe. If an assertion fails before
reaching the cleanup section, the test database objects (second_db,
second_dataset, chart_from_second_db) will remain in the database, potentially
affecting other tests. Consider using a try-finally block or pytest fixtures
with yield to ensure cleanup happens even when assertions fail.
##########
superset/commands/database/export.py:
##########
@@ -104,8 +104,12 @@ def _file_content(model: Database) -> str:
@staticmethod
def _export(
- model: Database, export_related: bool = True
+ model: Database, export_related: bool = True, seen: set[str] | None =
None
) -> Iterator[tuple[str, Callable[[], str]]]:
+ # Initialize seen set if not provided
+ if seen is None:
+ seen = set()
Review Comment:
Variable seen is not used.
```suggestion
seen = set()
# Mark `seen` as intentionally used to satisfy static analysis
_ = seen
```
##########
superset/commands/dataset/export.py:
##########
@@ -103,29 +107,33 @@ def _export(
)
file_path = f"databases/{db_file_name}.yaml"
- payload = model.database.export_to_dict(
- recursive=False,
- include_parent_ref=False,
- include_defaults=True,
- export_uuids=True,
- )
- # TODO (betodealmeida): move this logic to export_to_dict once this
- # becomes the default export endpoint
- if payload.get("extra"):
- try:
- payload["extra"] = json.loads(payload["extra"])
- except json.JSONDecodeError:
- logger.info("Unable to decode `extra` field: %s",
payload["extra"])
-
- if ssh_tunnel := model.database.ssh_tunnel:
- ssh_tunnel_payload = ssh_tunnel.export_to_dict(
+ # Only yield the database file if not already seen
+ # This is critical to fix the issue where databases were being
duplicated
+ # and potentially overwritten when charts from different databases
were exported
+ if file_path not in seen:
+ payload = model.database.export_to_dict(
recursive=False,
include_parent_ref=False,
include_defaults=True,
- export_uuids=False,
+ export_uuids=True,
)
- payload["ssh_tunnel"] = mask_password_info(ssh_tunnel_payload)
+ # TODO (betodealmeida): move this logic to export_to_dict once
this
+ # becomes the default export endpoint
+ if payload.get("extra"):
+ try:
+ payload["extra"] = json.loads(payload["extra"])
+ except json.JSONDecodeError:
+ logger.info("Unable to decode `extra` field: %s",
payload["extra"])
+
+ if ssh_tunnel := model.database.ssh_tunnel:
+ ssh_tunnel_payload = ssh_tunnel.export_to_dict(
+ recursive=False,
+ include_parent_ref=False,
+ include_defaults=True,
+ export_uuids=False,
+ )
+ payload["ssh_tunnel"] =
mask_password_info(ssh_tunnel_payload)
- payload["version"] = EXPORT_VERSION
+ payload["version"] = EXPORT_VERSION
- yield file_path, lambda: yaml.safe_dump(payload, sort_keys=False)
+ yield file_path, lambda: yaml.safe_dump(payload,
sort_keys=False)
Review Comment:
The lambda function may capture the wrong value of `payload` due to late
binding in Python closures. If multiple database files are yielded in a loop,
all lambdas would reference the final value of `payload` from the last
iteration. Consider using a default argument to capture the current value,
similar to: `lambda p=payload: yaml.safe_dump(p, sort_keys=False)`
```suggestion
yield file_path, lambda p=payload: yaml.safe_dump(p,
sort_keys=False)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]