This is an automated email from the ASF dual-hosted git repository. jli pushed a commit to branch fix-examples-not-loading in repository https://gitbox.apache.org/repos/asf/superset.git
commit 8e20e38b9a19614f6769a2feb6876cd82d8ff6c9 Author: Joe Li <[email protected]> AuthorDate: Thu Jan 29 16:00:13 2026 -0800 test(examples): add comprehensive tests for UUID/schema handling Adds 13 new tests covering all Codex-suggested cases: data_loading_test.py (5 new): - test_get_dataset_config_from_yaml_schema_main: schema "main" → None - test_get_dataset_config_from_yaml_empty_file: Empty YAML handling - test_get_dataset_config_from_yaml_invalid_yaml: Invalid YAML handling - test_get_multi_dataset_config_schema_main: schema "main" in multi-dataset - test_get_multi_dataset_config_missing_table_name: Falls back to dataset_name generic_loader_test.py (8 new): - test_find_dataset_no_uuid_no_schema: Basic lookup without UUID/schema - test_find_dataset_not_found: Returns (None, False) when nothing matches - test_load_parquet_table_no_backfill_when_uuid_already_set: Preserve UUID - test_load_parquet_table_no_backfill_when_schema_already_set: Preserve schema - test_load_parquet_table_both_uuid_and_schema_backfill: Backfill both - test_create_generic_loader_passes_schema: Schema propagation - test_create_generic_loader_description_set: Description applied - test_create_generic_loader_no_description: No description path Total: 32 tests covering UUID/schema extraction, lookup, backfill, preservation. Co-Authored-By: Claude Opus 4.5 <[email protected]> --- tests/unit_tests/examples/data_loading_test.py | 101 ++++++++++ tests/unit_tests/examples/generic_loader_test.py | 226 +++++++++++++++++++++++ 2 files changed, 327 insertions(+) diff --git a/tests/unit_tests/examples/data_loading_test.py b/tests/unit_tests/examples/data_loading_test.py index f8d934d11d2..61128d99589 100644 --- a/tests/unit_tests/examples/data_loading_test.py +++ b/tests/unit_tests/examples/data_loading_test.py @@ -69,6 +69,59 @@ def test_get_dataset_config_from_yaml_handles_missing_file(tmp_path: Path) -> No assert result["schema"] is None +def test_get_dataset_config_from_yaml_schema_main(tmp_path: Path) -> None: + """Test that schema: 'main' (SQLite default) becomes None.""" + from superset.examples.data_loading import get_dataset_config_from_yaml + + yaml_content = """ +table_name: test_table +schema: main +uuid: test-uuid-1234 +""" + dataset_yaml = tmp_path / "dataset.yaml" + dataset_yaml.write_text(yaml_content) + + result = get_dataset_config_from_yaml(tmp_path) + + # SQLite's 'main' schema should be treated as None + assert result["schema"] is None + assert result["table_name"] == "test_table" + assert result["uuid"] == "test-uuid-1234" + + +def test_get_dataset_config_from_yaml_empty_file(tmp_path: Path) -> None: + """Test that empty YAML file returns None for all fields.""" + from superset.examples.data_loading import get_dataset_config_from_yaml + + # Create empty dataset.yaml + dataset_yaml = tmp_path / "dataset.yaml" + dataset_yaml.write_text("") + + result = get_dataset_config_from_yaml(tmp_path) + + assert result["uuid"] is None + assert result["table_name"] is None + assert result["schema"] is None + assert result["data_file"] is None + + +def test_get_dataset_config_from_yaml_invalid_yaml(tmp_path: Path) -> None: + """Test that invalid YAML returns defaults (exception is caught internally).""" + from superset.examples.data_loading import get_dataset_config_from_yaml + + # Create invalid YAML (unclosed bracket) + dataset_yaml = tmp_path / "dataset.yaml" + dataset_yaml.write_text("table_name: [unclosed") + + # Function catches exceptions and returns defaults + result = get_dataset_config_from_yaml(tmp_path) + + assert result["uuid"] is None + assert result["table_name"] is None + assert result["schema"] is None + assert result["data_file"] is None + + def test_get_multi_dataset_config_extracts_uuid(tmp_path: Path) -> None: """Test that _get_multi_dataset_config extracts UUID from datasets/*.yaml.""" from superset.examples.data_loading import _get_multi_dataset_config @@ -127,3 +180,51 @@ def test_get_multi_dataset_config_handles_missing_file(tmp_path: Path) -> None: assert result["uuid"] is None # Falls back to dataset_name when no YAML assert result["table_name"] == "my_dataset" + + +def test_get_multi_dataset_config_schema_main(tmp_path: Path) -> None: + """Test that schema: 'main' becomes None in multi-dataset config.""" + from superset.examples.data_loading import _get_multi_dataset_config + + datasets_dir = tmp_path / "datasets" + datasets_dir.mkdir() + + yaml_content = """ +table_name: my_dataset +schema: main +uuid: test-uuid-1234 +""" + dataset_yaml = datasets_dir / "my_dataset.yaml" + dataset_yaml.write_text(yaml_content) + + data_file = tmp_path / "data" / "my_dataset.parquet" + + result = _get_multi_dataset_config(tmp_path, "my_dataset", data_file) + + # SQLite's 'main' schema should be treated as None + assert result["schema"] is None + assert result["uuid"] == "test-uuid-1234" + + +def test_get_multi_dataset_config_missing_table_name(tmp_path: Path) -> None: + """Test that missing table_name falls back to dataset_name.""" + from superset.examples.data_loading import _get_multi_dataset_config + + datasets_dir = tmp_path / "datasets" + datasets_dir.mkdir() + + # YAML without table_name + yaml_content = """ +schema: public +uuid: test-uuid-5678 +""" + dataset_yaml = datasets_dir / "my_dataset.yaml" + dataset_yaml.write_text(yaml_content) + + data_file = tmp_path / "data" / "my_dataset.parquet" + + result = _get_multi_dataset_config(tmp_path, "my_dataset", data_file) + + # Falls back to dataset_name when table_name not in YAML + assert result["table_name"] == "my_dataset" + assert result["uuid"] == "test-uuid-5678" diff --git a/tests/unit_tests/examples/generic_loader_test.py b/tests/unit_tests/examples/generic_loader_test.py index 08e5aaac045..3813fb3031a 100644 --- a/tests/unit_tests/examples/generic_loader_test.py +++ b/tests/unit_tests/examples/generic_loader_test.py @@ -503,3 +503,229 @@ def test_find_dataset_distinguishes_schemas(mock_db: MagicMock) -> None: assert result is schema_b_row assert found_by_uuid is False assert result.schema == "schema_b" + + +@patch("superset.examples.generic_loader.db") +def test_find_dataset_no_uuid_no_schema(mock_db: MagicMock) -> None: + """Test _find_dataset with no UUID and no schema (basic lookup).""" + from superset.examples.generic_loader import _find_dataset + + basic_row = MagicMock() + basic_row.uuid = None + basic_row.table_name = "test_table" + basic_row.schema = None + + # No UUID provided, so skip UUID lookup; table_name+database_id lookup returns row + mock_db.session.query.return_value.filter_by.return_value.first.return_value = ( + basic_row + ) + + result, found_by_uuid = _find_dataset("test_table", 1, None, None) + + assert result is basic_row + assert found_by_uuid is False + + +@patch("superset.examples.generic_loader.db") +@patch("superset.examples.generic_loader.get_example_database") +def test_load_parquet_table_no_backfill_when_uuid_already_set( + mock_get_db: MagicMock, + mock_db: MagicMock, +) -> None: + """Test that existing UUID is preserved (not overwritten) during backfill.""" + from superset.examples.generic_loader import load_parquet_table + + mock_database = MagicMock() + mock_inspector = _setup_database_mocks(mock_get_db, mock_database, has_table=True) + + with patch("superset.examples.generic_loader.inspect") as mock_inspect: + mock_inspect.return_value = mock_inspector + + # Existing table already has a UUID set + mock_existing_table = MagicMock() + mock_existing_table.uuid = "existing-uuid-1234" + mock_existing_table.schema = "public" + mock_existing_table.table_name = "test_table" + + mock_db.session.query.return_value.filter_by.return_value.first.return_value = ( + mock_existing_table + ) + + result = load_parquet_table( + parquet_file="test_data", + table_name="test_table", + database=mock_database, + only_metadata=True, + uuid="new-uuid-5678", # Try to set a different UUID + ) + + # Existing UUID should be preserved, not overwritten + assert result.uuid == "existing-uuid-1234" + + +@patch("superset.examples.generic_loader.db") +@patch("superset.examples.generic_loader.get_example_database") +def test_load_parquet_table_no_backfill_when_schema_already_set( + mock_get_db: MagicMock, + mock_db: MagicMock, +) -> None: + """Test that existing schema is preserved (not overwritten) during backfill.""" + from superset.examples.generic_loader import load_parquet_table + + mock_database = MagicMock() + mock_inspector = _setup_database_mocks(mock_get_db, mock_database, has_table=True) + + with patch("superset.examples.generic_loader.inspect") as mock_inspect: + mock_inspect.return_value = mock_inspector + + # Existing table already has a schema set + mock_existing_table = MagicMock() + mock_existing_table.uuid = "some-uuid" + mock_existing_table.schema = "existing_schema" + mock_existing_table.table_name = "test_table" + + mock_db.session.query.return_value.filter_by.return_value.first.return_value = ( + mock_existing_table + ) + + result = load_parquet_table( + parquet_file="test_data", + table_name="test_table", + database=mock_database, + only_metadata=True, + schema="new_schema", # Try to set a different schema + ) + + # Existing schema should be preserved, not overwritten + assert result.schema == "existing_schema" + + +@patch("superset.examples.generic_loader.db") +@patch("superset.examples.generic_loader.get_example_database") +def test_load_parquet_table_both_uuid_and_schema_backfill( + mock_get_db: MagicMock, + mock_db: MagicMock, +) -> None: + """Test that both UUID and schema are backfilled in a single call.""" + from superset.examples.generic_loader import load_parquet_table + + mock_database = MagicMock() + mock_inspector = _setup_database_mocks(mock_get_db, mock_database, has_table=True) + + with patch("superset.examples.generic_loader.inspect") as mock_inspect: + mock_inspect.return_value = mock_inspector + + # Existing table with neither UUID nor schema + mock_existing_table = MagicMock() + mock_existing_table.uuid = None + mock_existing_table.schema = None + mock_existing_table.table_name = "test_table" + + # UUID lookup returns None, table_name lookup returns the table + def filter_by_side_effect(**kwargs): + mock_result = MagicMock() + if "uuid" in kwargs: + mock_result.first.return_value = None + else: + mock_result.first.return_value = mock_existing_table + return mock_result + + mock_db.session.query.return_value.filter_by.side_effect = filter_by_side_effect + + result = load_parquet_table( + parquet_file="test_data", + table_name="test_table", + database=mock_database, + only_metadata=True, + uuid="new-uuid", + schema="new_schema", + ) + + # Both should be backfilled + assert result.uuid == "new-uuid" + assert result.schema == "new_schema" + + +def test_create_generic_loader_passes_schema() -> None: + """Test that create_generic_loader passes schema to load_parquet_table.""" + from superset.examples.generic_loader import create_generic_loader + + test_schema = "custom_schema" + loader = create_generic_loader( + parquet_file="test_data", + table_name="test_table", + schema=test_schema, + ) + + assert loader is not None + assert callable(loader) + assert loader.__name__ == "load_test_data" + + +@patch("superset.examples.generic_loader.db") +def test_find_dataset_not_found(mock_db: MagicMock) -> None: + """Test that _find_dataset returns (None, False) when nothing matches.""" + from superset.examples.generic_loader import _find_dataset + + # Both UUID and table_name lookups return None + mock_db.session.query.return_value.filter_by.return_value.first.return_value = None + + result, found_by_uuid = _find_dataset( + "nonexistent_table", 999, "nonexistent-uuid", "public" + ) + + assert result is None + assert found_by_uuid is False + + +@patch("superset.examples.generic_loader.db") +@patch("superset.examples.generic_loader.load_parquet_table") +def test_create_generic_loader_description_set( + mock_load_parquet: MagicMock, + mock_db: MagicMock, +) -> None: + """Test that create_generic_loader applies description to the dataset.""" + from superset.examples.generic_loader import create_generic_loader + + mock_tbl = MagicMock() + mock_load_parquet.return_value = mock_tbl + + loader = create_generic_loader( + parquet_file="test_data", + table_name="test_table", + description="Test dataset description", + ) + + # Call the loader (type annotation in create_generic_loader is incorrect) + loader(True, False, False) # type: ignore[call-arg,arg-type] + + # Verify description was set + assert mock_tbl.description == "Test dataset description" + mock_db.session.merge.assert_called_with(mock_tbl) + mock_db.session.commit.assert_called() + + +@patch("superset.examples.generic_loader.db") +@patch("superset.examples.generic_loader.load_parquet_table") +def test_create_generic_loader_no_description( + mock_load_parquet: MagicMock, + mock_db: MagicMock, +) -> None: + """Test that create_generic_loader skips description update when None.""" + from superset.examples.generic_loader import create_generic_loader + + mock_tbl = MagicMock() + mock_load_parquet.return_value = mock_tbl + + loader = create_generic_loader( + parquet_file="test_data", + table_name="test_table", + description=None, # No description + ) + + # Call the loader (type annotation in create_generic_loader is incorrect) + loader(True, False, False) # type: ignore[call-arg,arg-type] + + # Verify description was NOT set (no extra commit for description) + # The key is that tbl.description should not be assigned + assert not hasattr(mock_tbl, "description") or mock_tbl.description != "anything"
