This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 1d24e710 Turn `ObjectStoreLocationProvider` off by default (#1722)
1d24e710 is described below

commit 1d24e71041e35e26e126aa4508ed7384e8aa031c
Author: Kevin Liu <[email protected]>
AuthorDate: Tue Feb 25 17:36:53 2025 -0500

    Turn `ObjectStoreLocationProvider` off by default (#1722)
    
    Closes #1721
    
    Otherwise, there's a default behavior change in new 0.9.0 release.
    Previous versions will write to `data/`, new version will write to
    `data/<hash>/`
---
 pyiceberg/table/__init__.py                              |  2 +-
 tests/integration/test_writes/test_partitioned_writes.py |  9 +++++----
 tests/table/test_locations.py                            | 10 +++++++---
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index e625b848..679f74d1 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -202,7 +202,7 @@ class TableProperties:
     WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl"
 
     OBJECT_STORE_ENABLED = "write.object-storage.enabled"
-    OBJECT_STORE_ENABLED_DEFAULT = True
+    OBJECT_STORE_ENABLED_DEFAULT = False
 
     WRITE_OBJECT_STORE_PARTITIONED_PATHS = 
"write.object-storage.partitioned-paths"
     WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True
diff --git a/tests/integration/test_writes/test_partitioned_writes.py 
b/tests/integration/test_writes/test_partitioned_writes.py
index 1e6ea1b7..a299036e 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -294,13 +294,14 @@ def 
test_object_storage_location_provider_excludes_partition_path(
         PartitionField(source_id=nested_field.field_id, field_id=1001, 
transform=IdentityTransform(), name=part_col)
     )
 
-    # write.object-storage.enabled and write.object-storage.partitioned-paths 
don't need to be specified as they're on by default
-    assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT
-    assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT
+    # Enable `write.object-storage.enabled` which is False by default
+    # `write.object-storage.partitioned-paths` is True by default
+    assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT is False
+    assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT is True
     tbl = _create_table(
         session_catalog=session_catalog,
         
identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}",
-        properties={"format-version": str(format_version)},
+        properties={"format-version": str(format_version), 
TableProperties.OBJECT_STORE_ENABLED: True},
         data=[arrow_table_with_null],
         partition_spec=partition_spec,
     )
diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
index d66bf187..4efa6432 100644
--- a/tests/table/test_locations.py
+++ b/tests/table/test_locations.py
@@ -74,7 +74,7 @@ def test_custom_location_provider_not_found(caplog: Any) -> 
None:
 
 
 def test_object_storage_no_partition() -> None:
-    provider = load_location_provider(table_location="table_location", 
table_properties=EMPTY_DICT)
+    provider = load_location_provider(table_location="table_location", 
table_properties={"write.object-storage.enabled": "true"})
 
     location = provider.new_data_location("test.parquet")
     parts = location.split("/")
@@ -111,6 +111,7 @@ def 
test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
     provider = load_location_provider(
         table_location="table_location",
         table_properties={
+            "write.object-storage.enabled": "true",
             "write.object-storage.partitioned-paths": "false",
         },
     )
@@ -131,7 +132,7 @@ def 
test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
     ],
 )
 def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
-    provider = load_location_provider(table_location="table_location", 
table_properties=EMPTY_DICT)
+    provider = load_location_provider(table_location="table_location", 
table_properties={"write.object-storage.enabled": "true"})
 
     assert provider.new_data_location(data_file_name) == 
f"table_location/data/{expected_hash}/{data_file_name}"
 
@@ -139,7 +140,10 @@ def test_hash_injection(data_file_name: str, 
expected_hash: str) -> None:
 def test_object_location_provider_write_data_path() -> None:
     provider = load_location_provider(
         table_location="s3://table-location/table",
-        table_properties={TableProperties.WRITE_DATA_PATH: 
"s3://table-location/custom/data/path"},
+        table_properties={
+            "write.object-storage.enabled": "true",
+            TableProperties.WRITE_DATA_PATH: 
"s3://table-location/custom/data/path",
+        },
     )
 
     assert (

Reply via email to