This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 70342ac8 Allow setting non-string typed values in `set_properties`
(#504)
70342ac8 is described below
commit 70342ac83d2d1f121f3ab04c6d7317c8830fdad1
Author: Kevin Liu <[email protected]>
AuthorDate: Fri Mar 8 23:34:42 2024 -0800
Allow setting non-string typed values in `set_properties` (#504)
* pass dict properties to set_properties
* whitespace
* set non-string property values
* test error for none
* add comment
* rewrite validator
* properties validator
---
pyiceberg/catalog/rest.py | 7 +++++--
pyiceberg/table/__init__.py | 9 +++++++--
pyiceberg/table/metadata.py | 4 +++-
tests/integration/test_reads.py | 17 +++++++++++++++++
4 files changed, 32 insertions(+), 5 deletions(-)
diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py
index c401339e..a5f33f02 100644
--- a/pyiceberg/catalog/rest.py
+++ b/pyiceberg/catalog/rest.py
@@ -149,9 +149,12 @@ class CreateTableRequest(IcebergBaseModel):
partition_spec: Optional[PartitionSpec] = Field(alias="partition-spec")
write_order: Optional[SortOrder] = Field(alias="write-order")
stage_create: bool = Field(alias="stage-create", default=False)
- properties: Properties = Field(default_factory=dict)
+ properties: Dict[str, str] = Field(default_factory=dict)
+
# validators
- transform_properties_dict_value_to_str = field_validator('properties',
mode='before')(transform_dict_value_to_str)
+ @field_validator('properties', mode='before')
+ def transform_properties_dict_value_to_str(cls, properties: Properties) ->
Dict[str, str]:
+ return transform_dict_value_to_str(properties)
class RegisterTableRequest(IcebergBaseModel):
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 09cb814a..76aa533d 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -42,7 +42,7 @@ from typing import (
Union,
)
-from pydantic import Field, SerializeAsAny
+from pydantic import Field, SerializeAsAny, field_validator
from sortedcontainers import SortedList
from typing_extensions import Annotated
@@ -124,6 +124,7 @@ from pyiceberg.types import (
NestedField,
PrimitiveType,
StructType,
+ transform_dict_value_to_str,
)
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.datetime import datetime_to_millis
@@ -293,7 +294,7 @@ class Transaction:
return self
- def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs:
str) -> Transaction:
+ def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs:
Any) -> Transaction:
"""Set properties.
When a property is already set, it will be overwritten.
@@ -474,6 +475,10 @@ class SetPropertiesUpdate(TableUpdate):
action: TableUpdateAction = TableUpdateAction.set_properties
updates: Dict[str, str]
+ @field_validator('updates', mode='before')
+ def transform_properties_dict_value_to_str(cls, properties: Properties) ->
Dict[str, str]:
+ return transform_dict_value_to_str(properties)
+
class RemovePropertiesUpdate(TableUpdate):
action: TableUpdateAction = TableUpdateAction.remove_properties
diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py
index 1e5f0fdc..323f6d85 100644
--- a/pyiceberg/table/metadata.py
+++ b/pyiceberg/table/metadata.py
@@ -221,7 +221,9 @@ class TableMetadataCommonFields(IcebergBaseModel):
current-snapshot-id even if the refs map is null."""
# validators
- transform_properties_dict_value_to_str = field_validator('properties',
mode='before')(transform_dict_value_to_str)
+ @field_validator('properties', mode='before')
+ def transform_properties_dict_value_to_str(cls, properties: Properties) ->
Dict[str, str]:
+ return transform_dict_value_to_str(properties)
def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]:
"""Get the snapshot by snapshot_id."""
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index da43e782..fdc13ae7 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -24,6 +24,7 @@ import pyarrow.parquet as pq
import pytest
from hive_metastore.ttypes import LockRequest, LockResponse, LockState,
UnlockRequest
from pyarrow.fs import S3FileSystem
+from pydantic_core import ValidationError
from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.catalog.hive import HiveCatalog, _HiveClient
@@ -119,6 +120,14 @@ def test_table_properties(catalog: Catalog) -> None:
table = table.transaction().remove_properties("abc").commit_transaction()
assert table.properties == DEFAULT_PROPERTIES
+ table = table.transaction().set_properties(abc=123).commit_transaction()
+ # properties are stored as strings in the iceberg spec
+ assert table.properties == dict(abc="123", **DEFAULT_PROPERTIES)
+
+ with pytest.raises(ValidationError) as exc_info:
+
table.transaction().set_properties(property_name=None).commit_transaction()
+ assert "None type is not a supported value in properties: property_name"
in str(exc_info.value)
+
@pytest.mark.integration
@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'),
pytest.lazy_fixture('catalog_rest')])
@@ -141,6 +150,14 @@ def test_table_properties_dict(catalog: Catalog) -> None:
table = table.transaction().remove_properties("abc").commit_transaction()
assert table.properties == DEFAULT_PROPERTIES
+ table = table.transaction().set_properties({"abc":
123}).commit_transaction()
+ # properties are stored as strings in the iceberg spec
+ assert table.properties == dict({"abc": "123"}, **DEFAULT_PROPERTIES)
+
+ with pytest.raises(ValidationError) as exc_info:
+ table.transaction().set_properties({"property_name":
None}).commit_transaction()
+ assert "None type is not a supported value in properties: property_name"
in str(exc_info.value)
+
@pytest.mark.integration
@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'),
pytest.lazy_fixture('catalog_rest')])