This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new a09bcde4 Improve `LocationProvider` unit tests (#1511)
a09bcde4 is described below

commit a09bcde43c40e0a582fbfeb1e971aa52278c99c5
Author: smaheshwar-pltr <[email protected]>
AuthorDate: Mon Jan 13 17:38:47 2025 +0000

    Improve `LocationProvider` unit tests (#1511)
    
    * Improve `LocationProvider` unit tests
    
    * Renamed `test_object_storage_injects_entropy` to 
test_object_storage_no_partition
    
    ---------
    
    Co-authored-by: Sreesh Maheshwar <[email protected]>
---
 tests/table/test_locations.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
index 6753fe5a..67911b62 100644
--- a/tests/table/test_locations.py
+++ b/tests/table/test_locations.py
@@ -38,12 +38,18 @@ class CustomLocationProvider(LocationProvider):
         return f"custom_location_provider/{data_file_name}"
 
 
-def test_default_location_provider() -> None:
+def test_simple_location_provider_no_partition() -> None:
     provider = load_location_provider(table_location="table_location", 
table_properties={"write.object-storage.enabled": "false"})
 
     assert provider.new_data_location("my_file") == 
"table_location/data/my_file"
 
 
+def test_simple_location_provider_with_partition() -> None:
+    provider = load_location_provider(table_location="table_location", 
table_properties={"write.object-storage.enabled": "false"})
+
+    assert provider.new_data_location("my_file", PARTITION_KEY) == 
"table_location/data/string_field=example_string/my_file"
+
+
 def test_custom_location_provider() -> None:
     qualified_name = CustomLocationProvider.__module__ + "." + 
CustomLocationProvider.__name__
     provider = load_location_provider(
@@ -65,7 +71,7 @@ def test_custom_location_provider_not_found() -> None:
         )
 
 
-def test_object_storage_injects_entropy() -> None:
+def test_object_storage_no_partition() -> None:
     provider = load_location_provider(table_location="table_location", 
table_properties=EMPTY_DICT)
 
     location = provider.new_data_location("test.parquet")
@@ -82,19 +88,18 @@ def test_object_storage_injects_entropy() -> None:
         assert all(c in "01" for c in dir_name)
 
 
[email protected]("object_storage", [True, False])
-def test_partition_value_in_path(object_storage: bool) -> None:
+def test_object_storage_with_partition() -> None:
     provider = load_location_provider(
         table_location="table_location",
-        table_properties={
-            "write.object-storage.enabled": str(object_storage),
-        },
+        table_properties={"write.object-storage.enabled": "true"},
     )
 
     location = provider.new_data_location("test.parquet", PARTITION_KEY)
-    partition_segment = location.split("/")[-2]
 
-    assert partition_segment == "string_field=example_string"
+    # Partition values AND entropy included in the path. Entropy differs to 
that in the test below because the partition
+    # key AND the data file name are used as the hash input. This matches Java 
behaviour; the hash below is what the
+    # Java implementation produces for this input too.
+    assert location == 
"table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet"
 
 
 # NB: We test here with None partition key too because disabling partitioned 
paths still replaces final / with - even in

Reply via email to