This is an automated email from the ASF dual-hosted git repository.

rusackas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 87bbd54d0a feat(examples): Transpile virtual dataset SQL on import 
(#37311)
87bbd54d0a is described below

commit 87bbd54d0aca3b98671e0516e0b2d7a9e66bc3c3
Author: Evan Rusackas <[email protected]>
AuthorDate: Thu Jan 22 09:50:05 2026 -0800

    feat(examples): Transpile virtual dataset SQL on import (#37311)
    
    Co-authored-by: Claude Opus 4.5 <[email protected]>
    Co-authored-by: Beto Dealmeida <[email protected]>
    Co-authored-by: bito-code-review[bot] 
<188872107+bito-code-review[bot]@users.noreply.github.com>
---
 superset/commands/dashboard/export_example.py      |   4 +
 superset/commands/importers/v1/examples.py         |  58 +++++
 superset/datasets/schemas.py                       |   2 +
 superset/sql/parse.py                              |  21 +-
 .../commands/importers/v1/examples_test.py         | 244 +++++++++++++++++++++
 tests/unit_tests/sql/transpile_to_dialect_test.py  |  51 +++++
 6 files changed, 377 insertions(+), 3 deletions(-)

diff --git a/superset/commands/dashboard/export_example.py 
b/superset/commands/dashboard/export_example.py
index 0446e213b3..7924fe0ad4 100644
--- a/superset/commands/dashboard/export_example.py
+++ b/superset/commands/dashboard/export_example.py
@@ -175,6 +175,10 @@ def export_dataset_yaml(
         "schema": None,  # Don't export - use target database's default schema
         # Preserve SQL for virtual datasets, None for physical (data is in 
parquet)
         "sql": dataset.sql if is_preserved_virtual else None,
+        # Track source database engine for SQL transpilation during import
+        "source_db_engine": (
+            dataset.database.db_engine_spec.engine if is_preserved_virtual 
else None
+        ),
         "params": None,  # Don't export - contains stale import metadata
         "template_params": dataset.template_params,
         "filter_select_enabled": dataset.filter_select_enabled,
diff --git a/superset/commands/importers/v1/examples.py 
b/superset/commands/importers/v1/examples.py
index 99ecab7955..19fe811044 100644
--- a/superset/commands/importers/v1/examples.py
+++ b/superset/commands/importers/v1/examples.py
@@ -14,11 +14,13 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import logging
 from typing import Any, Optional
 
 from marshmallow import Schema
 from sqlalchemy.exc import MultipleResultsFound
 
+from superset import db
 from superset.charts.schemas import ImportV1ChartSchema
 from superset.commands.chart.importers.v1 import ImportChartsCommand
 from superset.commands.chart.importers.v1.utils import import_chart
@@ -41,9 +43,62 @@ from superset.daos.base import BaseDAO
 from superset.dashboards.schemas import ImportV1DashboardSchema
 from superset.databases.schemas import ImportV1DatabaseSchema
 from superset.datasets.schemas import ImportV1DatasetSchema
+from superset.exceptions import QueryClauseValidationException
+from superset.models.core import Database
+from superset.sql.parse import transpile_to_dialect
 from superset.utils.core import get_example_default_schema
 from superset.utils.decorators import transaction
 
+logger = logging.getLogger(__name__)
+
+
+def transpile_virtual_dataset_sql(config: dict[str, Any], database_id: int) -> 
None:
+    """
+    Transpile virtual dataset SQL to the target database dialect.
+
+    This ensures that virtual datasets exported from one database type
+    (e.g., PostgreSQL) can be loaded into a different database type
+    (e.g., MySQL, DuckDB, SQLite).
+
+    Args:
+        config: Dataset configuration dict (modified in place)
+        database_id: ID of the target database
+    """
+    sql = config.get("sql")
+    if not sql:
+        return
+
+    database = db.session.query(Database).get(database_id)
+    if not database:
+        logger.warning("Database %s not found, skipping SQL transpilation", 
database_id)
+        return
+
+    target_engine = database.db_engine_spec.engine
+    source_engine = config.get("source_db_engine")
+    if target_engine == source_engine:
+        logger.info("Source and target dialects are identical, skipping 
transpilation")
+        return
+
+    try:
+        transpiled_sql = transpile_to_dialect(sql, target_engine, 
source_engine)
+        if transpiled_sql != sql:
+            logger.info(
+                "Transpiled virtual dataset SQL for '%s' from %s to %s 
dialect",
+                config.get("table_name", "unknown"),
+                source_engine or "generic",
+                target_engine,
+            )
+            config["sql"] = transpiled_sql
+    except QueryClauseValidationException as ex:
+        logger.warning(
+            "Could not transpile SQL for dataset '%s' from %s to %s: %s. "
+            "Using original SQL which may not be compatible.",
+            config.get("table_name", "unknown"),
+            source_engine or "generic",
+            target_engine,
+            ex,
+        )
+
 
 class ImportExamplesCommand(ImportModelsCommand):
     """Import examples"""
@@ -119,6 +174,9 @@ class ImportExamplesCommand(ImportModelsCommand):
                 if config["schema"] is None:
                     config["schema"] = get_example_default_schema()
 
+                # transpile virtual dataset SQL to target database dialect
+                transpile_virtual_dataset_sql(config, config["database_id"])
+
                 try:
                     dataset = import_dataset(
                         config,
diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py
index 96bc44a9d3..1506ef45d1 100644
--- a/superset/datasets/schemas.py
+++ b/superset/datasets/schemas.py
@@ -322,6 +322,8 @@ class ImportV1DatasetSchema(Schema):
     schema = fields.String(allow_none=True)
     catalog = fields.String(allow_none=True)
     sql = fields.String(allow_none=True)
+    # Source database engine for SQL transpilation (virtual datasets only)
+    source_db_engine = fields.String(allow_none=True, load_default=None)
     params = fields.Dict(allow_none=True)
     template_params = fields.Dict(allow_none=True)
     filter_select_enabled = fields.Boolean()
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index af72f72e95..af9a740ec7 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -1522,9 +1522,21 @@ def sanitize_clause(clause: str, engine: str) -> str:
         raise QueryClauseValidationException(f"Invalid SQL clause: {clause}") 
from ex
 
 
-def transpile_to_dialect(sql: str, target_engine: str) -> str:
+def transpile_to_dialect(
+    sql: str,
+    target_engine: str,
+    source_engine: str | None = None,
+) -> str:
     """
-    Transpile SQL from "generic SQL" to the target database dialect using 
SQLGlot.
+    Transpile SQL from one database dialect to another using SQLGlot.
+
+    Args:
+        sql: The SQL query to transpile
+        target_engine: The target database engine (e.g., "mysql", "postgresql")
+        source_engine: The source database engine. If None, uses generic SQL 
dialect.
+
+    Returns:
+        The transpiled SQL string
 
     If the target engine is not in SQLGLOT_DIALECTS, returns the SQL as-is.
     """
@@ -1534,8 +1546,11 @@ def transpile_to_dialect(sql: str, target_engine: str) 
-> str:
     if target_dialect is None:
         return sql
 
+    # Get source dialect (default to generic if not specified)
+    source_dialect = SQLGLOT_DIALECTS.get(source_engine) if source_engine else 
Dialect
+
     try:
-        parsed = sqlglot.parse_one(sql, dialect=Dialect)
+        parsed = sqlglot.parse_one(sql, dialect=source_dialect)
         return Dialect.get_or_raise(target_dialect).generate(
             parsed,
             copy=True,
diff --git a/tests/unit_tests/commands/importers/v1/examples_test.py 
b/tests/unit_tests/commands/importers/v1/examples_test.py
new file mode 100644
index 0000000000..1ad6176dc1
--- /dev/null
+++ b/tests/unit_tests/commands/importers/v1/examples_test.py
@@ -0,0 +1,244 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Tests for the examples importer, specifically SQL transpilation."""
+
+from unittest.mock import MagicMock, patch
+
+from superset.commands.importers.v1.examples import 
transpile_virtual_dataset_sql
+
+
+def test_transpile_virtual_dataset_sql_no_sql():
+    """Test that configs without SQL are unchanged."""
+    config = {"table_name": "my_table", "sql": None}
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] is None
+
+
+def test_transpile_virtual_dataset_sql_empty_sql():
+    """Test that configs with empty SQL are unchanged."""
+    config = {"table_name": "my_table", "sql": ""}
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] == ""
+
+
+@patch("superset.commands.importers.v1.examples.db")
+def test_transpile_virtual_dataset_sql_database_not_found(mock_db):
+    """Test graceful handling when database is not found."""
+    mock_db.session.query.return_value.get.return_value = None
+
+    config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+    original_sql = config["sql"]
+
+    transpile_virtual_dataset_sql(config, 999)
+
+    # SQL should remain unchanged
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db):
+    """Test successful SQL transpilation with source engine."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    mock_transpile.return_value = "SELECT * FROM `foo`"
+
+    config = {
+        "table_name": "my_table",
+        "sql": "SELECT * FROM foo",
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == "SELECT * FROM `foo`"
+    mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", 
"postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile, 
mock_db):
+    """Test transpilation when source_db_engine is not specified (legacy)."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    mock_transpile.return_value = "SELECT * FROM `foo`"
+
+    # No source_db_engine - should default to None (generic dialect)
+    config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == "SELECT * FROM `foo`"
+    mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None)
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db):
+    """Test when transpilation returns same SQL (no dialect differences)."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "postgresql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = "SELECT * FROM foo"
+    mock_transpile.return_value = original_sql
+
+    config = {
+        "table_name": "my_table",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db):
+    """Test graceful fallback when transpilation fails."""
+    from superset.exceptions import QueryClauseValidationException
+
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    mock_transpile.side_effect = QueryClauseValidationException("Parse error")
+
+    original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo"
+    config = {
+        "table_name": "my_table",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+
+    # Should not raise, should keep original SQL
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile, 
mock_db):
+    """Test transpilation from PostgreSQL to DuckDB."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "duckdb"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = """
+        SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
+        FROM orders WHERE status = 'completed' GROUP BY 1
+    """
+    transpiled_sql = """
+        SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
+        FROM orders WHERE status = 'completed' GROUP BY 1
+    """
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "monthly_orders",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "duckdb", 
"postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile, 
mock_db):
+    """Test transpilation from PostgreSQL to ClickHouse.
+
+    ClickHouse has different syntax for date functions, so this tests
+    real dialect differences.
+    """
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "clickhouse"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    # PostgreSQL syntax
+    original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM 
orders"
+    # ClickHouse uses toStartOfMonth instead
+    transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders"
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "monthly_orders",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "clickhouse", 
"postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile, 
mock_db):
+    """Test transpilation from PostgreSQL to MySQL.
+
+    MySQL uses backticks for identifiers and has different casting syntax.
+    """
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    # PostgreSQL syntax with :: casting
+    original_sql = "SELECT created_at::DATE AS date_only FROM orders"
+    # MySQL syntax with CAST
+    transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM 
`orders`"
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "orders_dates",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile, 
mock_db):
+    """Test transpilation from PostgreSQL to SQLite."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "sqlite"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL 
'7 days'"
+    transpiled_sql = (
+        "SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')"
+    )
+    mock_transpile.return_value = transpiled_sql
+
+    config = {
+        "table_name": "recent_orders",
+        "sql": original_sql,
+        "source_db_engine": "postgresql",
+    }
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "sqlite", 
"postgresql")
diff --git a/tests/unit_tests/sql/transpile_to_dialect_test.py 
b/tests/unit_tests/sql/transpile_to_dialect_test.py
index 1327b09009..5a11e501fa 100644
--- a/tests/unit_tests/sql/transpile_to_dialect_test.py
+++ b/tests/unit_tests/sql/transpile_to_dialect_test.py
@@ -345,3 +345,54 @@ def test_sqlglot_generation_error_raises_exception() -> 
None:
                 match="Cannot transpile SQL to postgresql",
             ):
                 transpile_to_dialect("name = 'test'", "postgresql")
+
+
+# Tests for source_engine parameter
[email protected](
+    ("sql", "source_engine", "target_engine", "expected"),
+    [
+        # PostgreSQL to MySQL - should convert :: casting to CAST()
+        (
+            "SELECT created_at::DATE FROM orders",
+            "postgresql",
+            "mysql",
+            "SELECT CAST(created_at AS DATE) FROM orders",
+        ),
+        # Same dialect - should preserve SQL
+        (
+            "SELECT * FROM orders",
+            "postgresql",
+            "postgresql",
+            "SELECT * FROM orders",
+        ),
+        # PostgreSQL to DuckDB - DuckDB supports similar syntax (uppercases 
date part)
+        (
+            "SELECT DATE_TRUNC('month', ts) FROM orders",
+            "postgresql",
+            "duckdb",
+            "SELECT DATE_TRUNC('MONTH', ts) FROM orders",
+        ),
+    ],
+)
+def test_transpile_with_source_engine(
+    sql: str, source_engine: str, target_engine: str, expected: str
+) -> None:
+    """Test transpilation with explicit source engine."""
+    result = transpile_to_dialect(sql, target_engine, source_engine)
+    assert result == expected
+
+
+def test_transpile_source_engine_none_uses_generic() -> None:
+    """Test that source_engine=None uses generic dialect (backward 
compatible)."""
+    # Simple SQL that doesn't require dialect-specific parsing
+    result = transpile_to_dialect("SELECT * FROM orders", "postgresql", None)
+    assert result == "SELECT * FROM orders"
+
+
+def test_transpile_unknown_source_engine_uses_generic() -> None:
+    """Test that unknown source_engine falls back to generic dialect."""
+    # Unknown engine should be treated as None (generic)
+    result = transpile_to_dialect(
+        "SELECT * FROM orders", "postgresql", "unknown_engine"
+    )
+    assert result == "SELECT * FROM orders"

Reply via email to