This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new a09bcde4 Improve `LocationProvider` unit tests (#1511)
a09bcde4 is described below
commit a09bcde43c40e0a582fbfeb1e971aa52278c99c5
Author: smaheshwar-pltr <[email protected]>
AuthorDate: Mon Jan 13 17:38:47 2025 +0000
Improve `LocationProvider` unit tests (#1511)
* Improve `LocationProvider` unit tests
* Renamed `test_object_storage_injects_entropy` to
test_object_storage_no_partition
---------
Co-authored-by: Sreesh Maheshwar <[email protected]>
---
tests/table/test_locations.py | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
index 6753fe5a..67911b62 100644
--- a/tests/table/test_locations.py
+++ b/tests/table/test_locations.py
@@ -38,12 +38,18 @@ class CustomLocationProvider(LocationProvider):
return f"custom_location_provider/{data_file_name}"
-def test_default_location_provider() -> None:
+def test_simple_location_provider_no_partition() -> None:
provider = load_location_provider(table_location="table_location",
table_properties={"write.object-storage.enabled": "false"})
assert provider.new_data_location("my_file") ==
"table_location/data/my_file"
+def test_simple_location_provider_with_partition() -> None:
+ provider = load_location_provider(table_location="table_location",
table_properties={"write.object-storage.enabled": "false"})
+
+ assert provider.new_data_location("my_file", PARTITION_KEY) ==
"table_location/data/string_field=example_string/my_file"
+
+
def test_custom_location_provider() -> None:
qualified_name = CustomLocationProvider.__module__ + "." +
CustomLocationProvider.__name__
provider = load_location_provider(
@@ -65,7 +71,7 @@ def test_custom_location_provider_not_found() -> None:
)
-def test_object_storage_injects_entropy() -> None:
+def test_object_storage_no_partition() -> None:
provider = load_location_provider(table_location="table_location",
table_properties=EMPTY_DICT)
location = provider.new_data_location("test.parquet")
@@ -82,19 +88,18 @@ def test_object_storage_injects_entropy() -> None:
assert all(c in "01" for c in dir_name)
[email protected]("object_storage", [True, False])
-def test_partition_value_in_path(object_storage: bool) -> None:
+def test_object_storage_with_partition() -> None:
provider = load_location_provider(
table_location="table_location",
- table_properties={
- "write.object-storage.enabled": str(object_storage),
- },
+ table_properties={"write.object-storage.enabled": "true"},
)
location = provider.new_data_location("test.parquet", PARTITION_KEY)
- partition_segment = location.split("/")[-2]
- assert partition_segment == "string_field=example_string"
+ # Partition values AND entropy included in the path. Entropy differs to
that in the test below because the partition
+ # key AND the data file name are used as the hash input. This matches Java
behaviour; the hash below is what the
+ # Java implementation produces for this input too.
+ assert location ==
"table_location/data/0001/0010/1001/00000011/string_field=example_string/test.parquet"
# NB: We test here with None partition key too because disabling partitioned
paths still replaces final / with - even in