This is an automated email from the ASF dual-hosted git repository.

skrawcz pushed a commit to branch stefan/fix-unit-tests
in repository https://gitbox.apache.org/repos/asf/hamilton.git

commit 5058835d8f72db9da2c37ef53f3c83af8fc0f28b
Author: Stefan Krawczyk <[email protected]>
AuthorDate: Mon Dec 29 22:30:53 2025 +1100

    Fixes hugging face test
    
    This changes things so that the we compare things
    in an order invariant way, but also handles the newer
    hugging face library change.
---
 tests/plugins/test_huggingface_extensions.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/plugins/test_huggingface_extensions.py 
b/tests/plugins/test_huggingface_extensions.py
index 7b52446a..c3f4b044 100644
--- a/tests/plugins/test_huggingface_extensions.py
+++ b/tests/plugins/test_huggingface_extensions.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import json
 import pathlib
 
 import lancedb
@@ -24,6 +25,11 @@ from datasets import Dataset, DatasetDict
 from hamilton.plugins import huggingface_extensions
 
 
+def _normalize_for_comparison(d):
+    """Normalize dictionary/list for order-independent comparison using JSON 
serialization."""
+    return json.loads(json.dumps(d, sort_keys=True, default=str))
+
+
 def test_hfds_loader():
     path_to_test = "tests/resources/hf_datasets"
     reader = huggingface_extensions.HuggingFaceDSLoader(path_to_test)
@@ -62,18 +68,23 @@ def test_hfds_lancedb_saver(tmp_path: pathlib.Path):
     saver = huggingface_extensions.HuggingFaceDSLanceDBSaver(db_client, 
"test_table")
     ds = Dataset.from_dict({"vector": [np.array([1.0, 2.0, 3.0])], 
"named_entities": ["a"]})
     metadata = saver.save_data(ds)
-    assert metadata == {
+
+    expected_metadata = {
+        "db_meta": {"table_name": "test_table"},
         "dataset_metadata": {
             "columns": ["vector", "named_entities"],
             "features": {
+                "vector": {"_type": "Sequence", "feature": {"_type": "Value", 
"dtype": "float64"}},
                 "named_entities": {"_type": "Value", "dtype": "string"},
-                "vector": {"_type": "List", "feature": {"_type": "Value", 
"dtype": "float64"}},
             },
             "rows": 1,
             "size_in_bytes": None,
         },
-        "db_meta": {"table_name": "test_table"},
     }
+
+    # Normalize both dictionaries for order-independent comparison using JSON
+    assert _normalize_for_comparison(metadata) == 
_normalize_for_comparison(expected_metadata)
+
     assert db_client.open_table("test_table").search().to_list() == [
         {"named_entities": "a", "vector": [1.0, 2.0, 3.0]}
     ]

Reply via email to