This is an automated email from the ASF dual-hosted git repository. skrawcz pushed a commit to branch stefan/fix-unit-tests in repository https://gitbox.apache.org/repos/asf/hamilton.git
commit 5058835d8f72db9da2c37ef53f3c83af8fc0f28b Author: Stefan Krawczyk <[email protected]> AuthorDate: Mon Dec 29 22:30:53 2025 +1100 Fixes hugging face test This changes things so that the we compare things in an order invariant way, but also handles the newer hugging face library change. --- tests/plugins/test_huggingface_extensions.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/plugins/test_huggingface_extensions.py b/tests/plugins/test_huggingface_extensions.py index 7b52446a..c3f4b044 100644 --- a/tests/plugins/test_huggingface_extensions.py +++ b/tests/plugins/test_huggingface_extensions.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import json import pathlib import lancedb @@ -24,6 +25,11 @@ from datasets import Dataset, DatasetDict from hamilton.plugins import huggingface_extensions +def _normalize_for_comparison(d): + """Normalize dictionary/list for order-independent comparison using JSON serialization.""" + return json.loads(json.dumps(d, sort_keys=True, default=str)) + + def test_hfds_loader(): path_to_test = "tests/resources/hf_datasets" reader = huggingface_extensions.HuggingFaceDSLoader(path_to_test) @@ -62,18 +68,23 @@ def test_hfds_lancedb_saver(tmp_path: pathlib.Path): saver = huggingface_extensions.HuggingFaceDSLanceDBSaver(db_client, "test_table") ds = Dataset.from_dict({"vector": [np.array([1.0, 2.0, 3.0])], "named_entities": ["a"]}) metadata = saver.save_data(ds) - assert metadata == { + + expected_metadata = { + "db_meta": {"table_name": "test_table"}, "dataset_metadata": { "columns": ["vector", "named_entities"], "features": { + "vector": {"_type": "Sequence", "feature": {"_type": "Value", "dtype": "float64"}}, "named_entities": {"_type": "Value", "dtype": "string"}, - "vector": {"_type": "List", "feature": {"_type": "Value", "dtype": "float64"}}, }, "rows": 1, "size_in_bytes": None, }, - "db_meta": {"table_name": "test_table"}, } + + # Normalize both dictionaries for order-independent comparison using JSON + assert _normalize_for_comparison(metadata) == _normalize_for_comparison(expected_metadata) + assert db_client.open_table("test_table").search().to_list() == [ {"named_entities": "a", "vector": [1.0, 2.0, 3.0]} ]
