Copilot commented on code in PR #37120:
URL: https://github.com/apache/superset/pull/37120#discussion_r2713838107


##########
superset/commands/query/export.py:
##########
@@ -79,21 +83,23 @@ def _export(
             database_slug = secure_filename(model.database.database_name)
             file_name = f"databases/{database_slug}.yaml"
 
-            payload = model.database.export_to_dict(
-                recursive=False,
-                include_parent_ref=False,
-                include_defaults=True,
-                export_uuids=True,
-            )
-            # TODO (betodealmeida): move this logic to export_to_dict once this
-            # becomes the default export endpoint
-            if "extra" in payload:
-                try:
-                    payload["extra"] = json.loads(payload["extra"])
-                except json.JSONDecodeError:
-                    logger.info("Unable to decode `extra` field: %s", 
payload["extra"])
+            # Only yield if not already seen (similar to dataset export)
+            if file_name not in seen:
+                payload = model.database.export_to_dict(
+                    recursive=False,
+                    include_parent_ref=False,
+                    include_defaults=True,
+                    export_uuids=True,
+                )
+                # TODO (betodealmeida): move this logic to export_to_dict once 
this
+                # becomes the default export endpoint
+                if "extra" in payload:
+                    try:
+                        payload["extra"] = json.loads(payload["extra"])
+                    except json.JSONDecodeError:
+                        logger.info("Unable to decode `extra` field: %s", 
payload["extra"])
 
-            payload["version"] = EXPORT_VERSION
+                payload["version"] = EXPORT_VERSION
 
-            file_content = yaml.safe_dump(payload, sort_keys=False)
-            yield file_name, lambda: file_content
+                file_content = yaml.safe_dump(payload, sort_keys=False)
+                yield file_name, lambda: file_content

Review Comment:
   The lambda function may capture the wrong value of `file_content` due to 
late binding in Python closures. If multiple database files are yielded in a 
loop, all lambdas would reference the final value of `file_content` from the 
last iteration. Consider using a default argument to capture the current value, 
similar to: `lambda fc=file_content: fc`



##########
tests/integration_tests/dashboards/commands_tests.py:
##########
@@ -507,6 +507,93 @@ def test_export_dashboard_command_no_related(self, 
mock_g1, mock_g2):
         }
         assert expected_paths == set(contents.keys())
 
+    @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices")
+    @patch("superset.security.manager.g")
+    @patch("superset.views.base.g")
+    def test_export_dashboard_cross_database_charts(self, mock_g1, mock_g2):
+        """
+        Test that dashboards with charts from multiple databases export 
correctly.
+        This reproduces issue #37113 where charts from different databases 
were missing.
+        """
+        mock_g1.user = security_manager.find_user("admin")
+        mock_g2.user = security_manager.find_user("admin")
+
+        # Create a second database for testing
+        second_db = Database(database_name="test_db_2", 
sqlalchemy_uri="sqlite://")
+        db.session.add(second_db)
+
+        # Create a dataset in the second database
+        second_dataset = SqlaTable(
+            table_name="second_dataset",
+            database=second_db,
+            database_id=second_db.id,
+            columns=[],
+        )
+        db.session.add(second_dataset)
+
+        # Create a chart using the second database's dataset
+        chart_from_second_db = Slice(
+            slice_name="Chart from Second Database",
+            datasource_type="table",
+            datasource_id=second_dataset.id,
+            datasource_name=second_dataset.table_name,
+            viz_type="bar",
+            params=json.dumps({"viz_type": "bar"}),
+        )
+        db.session.add(chart_from_second_db)
+
+        # Get the example dashboard and add the new chart
+        example_dashboard = (
+            db.session.query(Dashboard).filter_by(slug="world_health").one()
+        )
+
+        # Store original charts count
+        original_charts_count = len(example_dashboard.slices)
+
+        # Add the new chart from different database to the dashboard
+        example_dashboard.slices.append(chart_from_second_db)
+        db.session.commit()
+
+        # Export the dashboard
+        command = ExportDashboardsCommand([example_dashboard.id])
+        contents = dict(command.run())
+
+        # Verify all databases are exported
+        db_files = [key for key in contents.keys() if 
key.startswith("databases/")]
+        assert len(db_files) >= 2, f"Expected at least 2 database files, got 
{db_files}"
+
+        # Verify the second database is included
+        assert "databases/test_db_2.yaml" in contents.keys(), \
+            f"Second database not found in export. Keys: 
{list(contents.keys())}"
+
+        # Verify all charts are exported (original + new one)
+        chart_files = [key for key in contents.keys() if 
key.startswith("charts/")]
+        assert len(chart_files) == original_charts_count + 1, \
+            f"Expected {original_charts_count + 1} charts, got 
{len(chart_files)}"
+
+        # Verify the new chart from second database is included
+        chart_from_second_db_file = None
+        for key in chart_files:
+            if f"Chart_from_Second_Database_{chart_from_second_db.id}" in key:
+                chart_from_second_db_file = key
+                break
+
+        assert chart_from_second_db_file is not None, \
+            f"Chart from second database not found in export. Chart files: 
{chart_files}"

Review Comment:
   The assertion message formatting uses f-string with a variable that may not 
exist at runtime. If 'Chart_from_Second_Database_' is not found in any chart 
file, `chart_from_second_db_file` will be None when used in the assertion 
message. While the assertion will still work, consider moving the detailed 
error message construction before the assertion to avoid any potential runtime 
issues.
   ```suggestion
           error_msg = (
               "Chart from second database not found in export. "
               f"Chart files: {chart_files}"
           )
           assert chart_from_second_db_file is not None, error_msg
   ```



##########
superset/commands/export/models.py:
##########
@@ -51,23 +51,34 @@ def _file_content(model: Model) -> str:
 
     @staticmethod
     def _export(
-        model: Model, export_related: bool = True
+        model: Model, export_related: bool = True, seen: set[str] | None = None
     ) -> Iterator[tuple[str, Callable[[], str]]]:
         raise NotImplementedError("Subclasses MUST implement _export")
 
-    def run(self) -> Iterator[tuple[str, Callable[[], str]]]:
+    def run(self, seen: set[str] | None = None) -> Iterator[tuple[str, 
Callable[[], str]]]:
         self.validate()
 
-        metadata = {
-            "version": EXPORT_VERSION,
-            "type": self.dao.model_cls.__name__,  # type: ignore
-            "timestamp": datetime.now(tz=timezone.utc).isoformat(),
-        }
-        yield METADATA_FILE_NAME, lambda: yaml.safe_dump(metadata, 
sort_keys=False)
+        # Use provided seen set or create new one
+        if seen is None:
+            seen = set()
+            should_add_metadata = True
+        else:
+            # If seen set is provided, we're being called from another command
+            should_add_metadata = False
+
+        # Only add metadata if this is the root command
+        if should_add_metadata:
+            metadata = {
+                "version": EXPORT_VERSION,
+                "type": self.dao.model_cls.__name__,  # type: ignore
+                "timestamp": datetime.now(tz=timezone.utc).isoformat(),
+            }
+            if METADATA_FILE_NAME not in seen:
+                yield METADATA_FILE_NAME, lambda: yaml.safe_dump(metadata, 
sort_keys=False)
+                seen.add(METADATA_FILE_NAME)

Review Comment:
   The logic for determining when to add metadata may cause issues in nested 
export scenarios. When a non-root command creates its own `seen` set (when 
`seen is None`), it will set `should_add_metadata = True` and could potentially 
yield duplicate metadata files. Consider checking if METADATA_FILE_NAME is 
already in the `seen` set before deciding whether to add metadata, rather than 
relying solely on whether `seen` was passed in.
   ```suggestion
   
           # Only add metadata if it hasn't been added yet in this export graph
           should_add_metadata = METADATA_FILE_NAME not in seen
   
           if should_add_metadata:
               metadata = {
                   "version": EXPORT_VERSION,
                   "type": self.dao.model_cls.__name__,  # type: ignore
                   "timestamp": datetime.now(tz=timezone.utc).isoformat(),
               }
               yield METADATA_FILE_NAME, lambda: yaml.safe_dump(metadata, 
sort_keys=False)
               seen.add(METADATA_FILE_NAME)
   ```



##########
superset/commands/theme/export.py:
##########
@@ -67,8 +67,12 @@ def _file_content(model: Theme) -> str:
 
     @staticmethod
     def _export(
-        model: Theme, export_related: bool = True
+        model: Theme, export_related: bool = True, seen: set[str] | None = None
     ) -> Iterator[tuple[str, Callable[[], str]]]:
+        # Initialize seen set if not provided (for consistency)
+        if seen is None:
+            seen = set()
+

Review Comment:
   Variable seen is not used.
   ```suggestion
   
   ```



##########
tests/integration_tests/dashboards/commands_tests.py:
##########
@@ -507,6 +507,93 @@ def test_export_dashboard_command_no_related(self, 
mock_g1, mock_g2):
         }
         assert expected_paths == set(contents.keys())
 
+    @pytest.mark.usefixtures("load_world_bank_dashboard_with_slices")
+    @patch("superset.security.manager.g")
+    @patch("superset.views.base.g")
+    def test_export_dashboard_cross_database_charts(self, mock_g1, mock_g2):
+        """
+        Test that dashboards with charts from multiple databases export 
correctly.
+        This reproduces issue #37113 where charts from different databases 
were missing.
+        """
+        mock_g1.user = security_manager.find_user("admin")
+        mock_g2.user = security_manager.find_user("admin")
+
+        # Create a second database for testing
+        second_db = Database(database_name="test_db_2", 
sqlalchemy_uri="sqlite://")
+        db.session.add(second_db)
+
+        # Create a dataset in the second database
+        second_dataset = SqlaTable(
+            table_name="second_dataset",
+            database=second_db,
+            database_id=second_db.id,
+            columns=[],
+        )
+        db.session.add(second_dataset)
+
+        # Create a chart using the second database's dataset
+        chart_from_second_db = Slice(
+            slice_name="Chart from Second Database",
+            datasource_type="table",
+            datasource_id=second_dataset.id,
+            datasource_name=second_dataset.table_name,
+            viz_type="bar",
+            params=json.dumps({"viz_type": "bar"}),
+        )
+        db.session.add(chart_from_second_db)
+
+        # Get the example dashboard and add the new chart
+        example_dashboard = (
+            db.session.query(Dashboard).filter_by(slug="world_health").one()
+        )
+
+        # Store original charts count
+        original_charts_count = len(example_dashboard.slices)
+
+        # Add the new chart from different database to the dashboard
+        example_dashboard.slices.append(chart_from_second_db)
+        db.session.commit()
+
+        # Export the dashboard
+        command = ExportDashboardsCommand([example_dashboard.id])
+        contents = dict(command.run())
+
+        # Verify all databases are exported
+        db_files = [key for key in contents.keys() if 
key.startswith("databases/")]
+        assert len(db_files) >= 2, f"Expected at least 2 database files, got 
{db_files}"
+
+        # Verify the second database is included
+        assert "databases/test_db_2.yaml" in contents.keys(), \
+            f"Second database not found in export. Keys: 
{list(contents.keys())}"
+
+        # Verify all charts are exported (original + new one)
+        chart_files = [key for key in contents.keys() if 
key.startswith("charts/")]
+        assert len(chart_files) == original_charts_count + 1, \
+            f"Expected {original_charts_count + 1} charts, got 
{len(chart_files)}"
+
+        # Verify the new chart from second database is included
+        chart_from_second_db_file = None
+        for key in chart_files:
+            if f"Chart_from_Second_Database_{chart_from_second_db.id}" in key:
+                chart_from_second_db_file = key
+                break
+
+        assert chart_from_second_db_file is not None, \
+            f"Chart from second database not found in export. Chart files: 
{chart_files}"
+
+        # Verify the dataset from second database is included
+        dataset_files = [key for key in contents.keys() if 
key.startswith("datasets/")]
+        second_dataset_file = 
f"datasets/test_db_2/second_dataset_{second_dataset.id}.yaml"
+        assert second_dataset_file in contents.keys(), \
+            f"Second dataset not found. Dataset files: {dataset_files}"
+
+        # Clean up
+        example_dashboard.slices.remove(chart_from_second_db)
+        db.session.delete(chart_from_second_db)
+        db.session.delete(second_dataset)
+        db.session.delete(second_db)
+        db.session.commit()

Review Comment:
   The test cleanup is not exception-safe. If an assertion fails before 
reaching the cleanup section, the test database objects (second_db, 
second_dataset, chart_from_second_db) will remain in the database, potentially 
affecting other tests. Consider using a try-finally block or pytest fixtures 
with yield to ensure cleanup happens even when assertions fail.



##########
superset/commands/database/export.py:
##########
@@ -104,8 +104,12 @@ def _file_content(model: Database) -> str:
 
     @staticmethod
     def _export(
-        model: Database, export_related: bool = True
+        model: Database, export_related: bool = True, seen: set[str] | None = 
None
     ) -> Iterator[tuple[str, Callable[[], str]]]:
+        # Initialize seen set if not provided
+        if seen is None:
+            seen = set()

Review Comment:
   Variable seen is not used.
   ```suggestion
               seen = set()
           # Mark `seen` as intentionally used to satisfy static analysis
           _ = seen
   ```



##########
superset/commands/dataset/export.py:
##########
@@ -103,29 +107,33 @@ def _export(
             )
             file_path = f"databases/{db_file_name}.yaml"
 
-            payload = model.database.export_to_dict(
-                recursive=False,
-                include_parent_ref=False,
-                include_defaults=True,
-                export_uuids=True,
-            )
-            # TODO (betodealmeida): move this logic to export_to_dict once this
-            # becomes the default export endpoint
-            if payload.get("extra"):
-                try:
-                    payload["extra"] = json.loads(payload["extra"])
-                except json.JSONDecodeError:
-                    logger.info("Unable to decode `extra` field: %s", 
payload["extra"])
-
-            if ssh_tunnel := model.database.ssh_tunnel:
-                ssh_tunnel_payload = ssh_tunnel.export_to_dict(
+            # Only yield the database file if not already seen
+            # This is critical to fix the issue where databases were being 
duplicated
+            # and potentially overwritten when charts from different databases 
were exported
+            if file_path not in seen:
+                payload = model.database.export_to_dict(
                     recursive=False,
                     include_parent_ref=False,
                     include_defaults=True,
-                    export_uuids=False,
+                    export_uuids=True,
                 )
-                payload["ssh_tunnel"] = mask_password_info(ssh_tunnel_payload)
+                # TODO (betodealmeida): move this logic to export_to_dict once 
this
+                # becomes the default export endpoint
+                if payload.get("extra"):
+                    try:
+                        payload["extra"] = json.loads(payload["extra"])
+                    except json.JSONDecodeError:
+                        logger.info("Unable to decode `extra` field: %s", 
payload["extra"])
+
+                if ssh_tunnel := model.database.ssh_tunnel:
+                    ssh_tunnel_payload = ssh_tunnel.export_to_dict(
+                        recursive=False,
+                        include_parent_ref=False,
+                        include_defaults=True,
+                        export_uuids=False,
+                    )
+                    payload["ssh_tunnel"] = 
mask_password_info(ssh_tunnel_payload)
 
-            payload["version"] = EXPORT_VERSION
+                payload["version"] = EXPORT_VERSION
 
-            yield file_path, lambda: yaml.safe_dump(payload, sort_keys=False)
+                yield file_path, lambda: yaml.safe_dump(payload, 
sort_keys=False)

Review Comment:
   The lambda function may capture the wrong value of `payload` due to late 
binding in Python closures. If multiple database files are yielded in a loop, 
all lambdas would reference the final value of `payload` from the last 
iteration. Consider using a default argument to capture the current value, 
similar to: `lambda p=payload: yaml.safe_dump(p, sort_keys=False)`
   ```suggestion
                   yield file_path, lambda p=payload: yaml.safe_dump(p, 
sort_keys=False)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to