This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new c36c7bad3c Python: Add pickle support (#7645)
c36c7bad3c is described below

commit c36c7bad3ce60c6ca225760f332342fbc75b6528
Author: Georg Grob <[email protected]>
AuthorDate: Wed Jun 14 15:01:00 2023 +0100

    Python: Add pickle support (#7645)
    
    * Python: Add pickle support
    
    * Fix test
    
    * Fix type annotation
    
    * Update python/pyiceberg/types.py
    
    * Update python/pyiceberg/types.py
    
    * Update python/pyiceberg/types.py
    
    ---------
    
    Co-authored-by: Georg Grob <[email protected]>
    Co-authored-by: Fokko Driesprong <[email protected]>
---
 python/pyiceberg/expressions/__init__.py     | 19 ++++++++++++++
 python/pyiceberg/types.py                    | 18 +++++++++++++
 python/tests/expressions/test_expressions.py | 38 +++++++++++++++++++++++++---
 python/tests/test_types.py                   |  8 ++++++
 4 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/python/pyiceberg/expressions/__init__.py 
b/python/pyiceberg/expressions/__init__.py
index 8a7d35cc18..8737b22b2e 100644
--- a/python/pyiceberg/expressions/__init__.py
+++ b/python/pyiceberg/expressions/__init__.py
@@ -24,6 +24,7 @@ from typing import (
     Generic,
     Iterable,
     Set,
+    Tuple,
     Type,
     TypeVar,
     Union,
@@ -220,6 +221,9 @@ class And(BooleanExpression):
         # De Morgan's law: not (A and B) = (not A) or (not B)
         return Or(~self.left, ~self.right)
 
+    def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]:
+        return (self.left, self.right)
+
 
 class Or(BooleanExpression):
     """OR operation expression - logical disjunction."""
@@ -252,6 +256,9 @@ class Or(BooleanExpression):
         # De Morgan's law: not (A or B) = (not A) and (not B)
         return And(~self.left, ~self.right)
 
+    def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]:
+        return (self.left, self.right)
+
 
 class Not(BooleanExpression):
     """NOT operation expression - logical negation."""
@@ -278,6 +285,9 @@ class Not(BooleanExpression):
     def __invert__(self) -> BooleanExpression:
         return self.child
 
+    def __getnewargs__(self) -> Tuple[BooleanExpression]:
+        return (self.child,)
+
 
 class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression."""
@@ -364,6 +374,9 @@ class BoundUnaryPredicate(BoundPredicate[L], ABC):
     def as_unbound(self) -> Type[UnaryPredicate]:
         ...
 
+    def __getnewargs__(self) -> Tuple[BoundTerm[L]]:
+        return (self.term,)
+
 
 class BoundIsNull(BoundUnaryPredicate[L]):
     def __new__(cls, term: BoundTerm[L]) -> BooleanExpression:  # type: ignore 
 # pylint: disable=W0221
@@ -481,6 +494,9 @@ class SetPredicate(UnboundPredicate[L], ABC):
     def __eq__(self, other: Any) -> bool:
         return self.term == other.term and self.literals == other.literals if 
isinstance(other, SetPredicate) else False
 
+    def __getnewargs__(self) -> Tuple[UnboundTerm[L], Set[Literal[L]]]:
+        return (self.term, self.literals)
+
     @property
     @abstractmethod
     def as_bound(self) -> Type[BoundSetPredicate[L]]:
@@ -510,6 +526,9 @@ class BoundSetPredicate(BoundPredicate[L], ABC):
     def __eq__(self, other: Any) -> bool:
         return self.term == other.term and self.literals == other.literals if 
isinstance(other, BoundSetPredicate) else False
 
+    def __getnewargs__(self) -> Tuple[BoundTerm[L], Set[Literal[L]]]:
+        return (self.term, self.literals)
+
     @property
     @abstractmethod
     def as_unbound(self) -> Type[SetPredicate[L]]:
diff --git a/python/pyiceberg/types.py b/python/pyiceberg/types.py
index 582aa6f749..f826ebcaf9 100644
--- a/python/pyiceberg/types.py
+++ b/python/pyiceberg/types.py
@@ -146,6 +146,9 @@ class FixedType(PrimitiveType):
     def __repr__(self) -> str:
         return f"FixedType(length={self._len})"
 
+    def __getnewargs__(self) -> Tuple[int]:
+        return (self._len,)
+
 
 class DecimalType(PrimitiveType):
     """A fixed data type in Iceberg.
@@ -190,6 +193,9 @@ class DecimalType(PrimitiveType):
     def __repr__(self) -> str:
         return f"DecimalType(precision={self._precision}, scale={self._scale})"
 
+    def __getnewargs__(self) -> Tuple[int, int]:
+        return (self._precision, self._scale)
+
 
 class NestedField(IcebergType):
     """Represents a field of a struct, a map key, a map value, or a list 
element.
@@ -246,6 +252,9 @@ class NestedField(IcebergType):
         req = "required" if self.required else "optional"
         return f"{self.field_id}: {self.name}: {req} {self.field_type}{doc}"
 
+    def __getnewargs__(self) -> Tuple[int, str, IcebergType, bool, 
Optional[str]]:
+        return (self.field_id, self.name, self.field_type, self.required, 
self.doc)
+
     @property
     def optional(self) -> bool:
         return not self.required
@@ -286,6 +295,9 @@ class StructType(IcebergType):
     def __len__(self) -> int:
         return len(self.fields)
 
+    def __getnewargs__(self) -> Tuple[NestedField, ...]:
+        return self.fields
+
 
 class ListType(IcebergType):
     """A list type in Iceberg.
@@ -321,6 +333,9 @@ class ListType(IcebergType):
     def __str__(self) -> str:
         return f"list<{self.element_type}>"
 
+    def __getnewargs__(self) -> Tuple[int, IcebergType, bool]:
+        return (self.element_id, self.element_type, self.element_required)
+
 
 class MapType(IcebergType):
     """A map type in Iceberg.
@@ -366,6 +381,9 @@ class MapType(IcebergType):
     def __str__(self) -> str:
         return f"map<{self.key_type}, {self.value_type}>"
 
+    def __getnewargs__(self) -> Tuple[int, IcebergType, int, IcebergType, 
bool]:
+        return (self.key_id, self.key_type, self.value_id, self.value_type, 
self.value_required)
+
 
 class BooleanType(PrimitiveType):
     """A boolean data type in Iceberg can be represented using an instance of 
this class.
diff --git a/python/tests/expressions/test_expressions.py 
b/python/tests/expressions/test_expressions.py
index 5b82c14744..bd3a14165e 100644
--- a/python/tests/expressions/test_expressions.py
+++ b/python/tests/expressions/test_expressions.py
@@ -16,6 +16,7 @@
 # under the License.
 # pylint:disable=redefined-outer-name,eval-used
 
+import pickle
 import uuid
 from decimal import Decimal
 from typing import Any
@@ -679,6 +680,7 @@ def test_bound_reference(field: NestedField, accessor: 
Accessor) -> None:
     assert str(bound_ref) == f"BoundReference(field={repr(field)}, 
accessor={repr(accessor)})"
     assert repr(bound_ref) == f"BoundReference(field={repr(field)}, 
accessor={repr(accessor)})"
     assert bound_ref == eval(repr(bound_ref))
+    assert bound_ref == pickle.loads(pickle.dumps(bound_ref))
 
 
 def test_reference() -> None:
@@ -687,6 +689,7 @@ def test_reference() -> None:
     assert str(ref) == "Reference(name='abc')"
     assert repr(ref) == "Reference(name='abc')"
     assert ref == eval(repr(ref))
+    assert ref == pickle.loads(pickle.dumps(ref))
 
 
 def test_and() -> None:
@@ -696,6 +699,7 @@ def test_and() -> None:
     assert str(and_) == f"And(left={str(null)}, right={str(nan)})"
     assert repr(and_) == f"And(left={repr(null)}, right={repr(nan)})"
     assert and_ == eval(repr(and_))
+    assert and_ == pickle.loads(pickle.dumps(and_))
 
 
 def test_or() -> None:
@@ -705,14 +709,16 @@ def test_or() -> None:
     assert str(or_) == f"Or(left={str(null)}, right={str(nan)})"
     assert repr(or_) == f"Or(left={repr(null)}, right={repr(nan)})"
     assert or_ == eval(repr(or_))
+    assert or_ == pickle.loads(pickle.dumps(or_))
 
 
 def test_not() -> None:
     null = IsNull(Reference("a"))
-    or_ = Not(null)
-    assert str(or_) == f"Not(child={str(null)})"
-    assert repr(or_) == f"Not(child={repr(null)})"
-    assert or_ == eval(repr(or_))
+    not_ = Not(null)
+    assert str(not_) == f"Not(child={str(null)})"
+    assert repr(not_) == f"Not(child={repr(null)})"
+    assert not_ == eval(repr(not_))
+    assert not_ == pickle.loads(pickle.dumps(not_))
 
 
 def test_always_true() -> None:
@@ -720,6 +726,7 @@ def test_always_true() -> None:
     assert str(always_true) == "AlwaysTrue()"
     assert repr(always_true) == "AlwaysTrue()"
     assert always_true == eval(repr(always_true))
+    assert always_true == pickle.loads(pickle.dumps(always_true))
 
 
 def test_always_false() -> None:
@@ -727,6 +734,7 @@ def test_always_false() -> None:
     assert str(always_false) == "AlwaysFalse()"
     assert repr(always_false) == "AlwaysFalse()"
     assert always_false == eval(repr(always_false))
+    assert always_false == pickle.loads(pickle.dumps(always_false))
 
 
 def test_bound_reference_field_property() -> None:
@@ -756,6 +764,7 @@ def test_is_null() -> None:
     assert str(is_null) == f"IsNull(term={str(ref)})"
     assert repr(is_null) == f"IsNull(term={repr(ref)})"
     assert is_null == eval(repr(is_null))
+    assert is_null == pickle.loads(pickle.dumps(is_null))
 
 
 def test_not_null() -> None:
@@ -764,6 +773,7 @@ def test_not_null() -> None:
     assert str(non_null) == f"NotNull(term={str(ref)})"
     assert repr(non_null) == f"NotNull(term={repr(ref)})"
     assert non_null == eval(repr(non_null))
+    assert non_null == pickle.loads(pickle.dumps(non_null))
 
 
 def test_bound_is_nan(accessor: Accessor) -> None:
@@ -776,6 +786,7 @@ def test_bound_is_nan(accessor: Accessor) -> None:
     assert str(bound_is_nan) == f"BoundIsNaN(term={str(term)})"
     assert repr(bound_is_nan) == f"BoundIsNaN(term={repr(term)})"
     assert bound_is_nan == eval(repr(bound_is_nan))
+    assert bound_is_nan == pickle.loads(pickle.dumps(bound_is_nan))
 
 
 def test_bound_is_not_nan(accessor: Accessor) -> None:
@@ -788,6 +799,7 @@ def test_bound_is_not_nan(accessor: Accessor) -> None:
     assert str(bound_not_nan) == f"BoundNotNaN(term={str(term)})"
     assert repr(bound_not_nan) == f"BoundNotNaN(term={repr(term)})"
     assert bound_not_nan == eval(repr(bound_not_nan))
+    assert bound_not_nan == pickle.loads(pickle.dumps(bound_not_nan))
 
 
 def test_is_nan() -> None:
@@ -796,6 +808,7 @@ def test_is_nan() -> None:
     assert str(is_nan) == f"IsNaN(term={str(ref)})"
     assert repr(is_nan) == f"IsNaN(term={repr(ref)})"
     assert is_nan == eval(repr(is_nan))
+    assert is_nan == pickle.loads(pickle.dumps(is_nan))
 
 
 def test_not_nan() -> None:
@@ -804,6 +817,7 @@ def test_not_nan() -> None:
     assert str(not_nan) == f"NotNaN(term={str(ref)})"
     assert repr(not_nan) == f"NotNaN(term={repr(ref)})"
     assert not_nan == eval(repr(not_nan))
+    assert not_nan == pickle.loads(pickle.dumps(not_nan))
 
 
 def test_bound_in(term: BoundReference[Any]) -> None:
@@ -811,6 +825,7 @@ def test_bound_in(term: BoundReference[Any]) -> None:
     assert str(bound_in) == f"BoundIn({str(term)}, {{a, b, c}})"
     assert repr(bound_in) == f"BoundIn({repr(term)}, {{literal('a'), 
literal('b'), literal('c')}})"
     assert bound_in == eval(repr(bound_in))
+    assert bound_in == pickle.loads(pickle.dumps(bound_in))
 
 
 def test_bound_not_in(term: BoundReference[Any]) -> None:
@@ -818,6 +833,7 @@ def test_bound_not_in(term: BoundReference[Any]) -> None:
     assert str(bound_not_in) == f"BoundNotIn({str(term)}, {{a, b, c}})"
     assert repr(bound_not_in) == f"BoundNotIn({repr(term)}, {{literal('a'), 
literal('b'), literal('c')}})"
     assert bound_not_in == eval(repr(bound_not_in))
+    assert bound_not_in == pickle.loads(pickle.dumps(bound_not_in))
 
 
 def test_in() -> None:
@@ -826,6 +842,7 @@ def test_in() -> None:
     assert str(unbound_in) == f"In({str(ref)}, {{a, b, c}})"
     assert repr(unbound_in) == f"In({repr(ref)}, {{literal('a'), literal('b'), 
literal('c')}})"
     assert unbound_in == eval(repr(unbound_in))
+    assert unbound_in == pickle.loads(pickle.dumps(unbound_in))
 
 
 def test_not_in() -> None:
@@ -834,6 +851,7 @@ def test_not_in() -> None:
     assert str(not_in) == f"NotIn({str(ref)}, {{a, b, c}})"
     assert repr(not_in) == f"NotIn({repr(ref)}, {{literal('a'), literal('b'), 
literal('c')}})"
     assert not_in == eval(repr(not_in))
+    assert not_in == pickle.loads(pickle.dumps(not_in))
 
 
 def test_bound_equal_to(term: BoundReference[Any]) -> None:
@@ -841,6 +859,7 @@ def test_bound_equal_to(term: BoundReference[Any]) -> None:
     assert str(bound_equal_to) == f"BoundEqualTo(term={str(term)}, 
literal=literal('a'))"
     assert repr(bound_equal_to) == f"BoundEqualTo(term={repr(term)}, 
literal=literal('a'))"
     assert bound_equal_to == eval(repr(bound_equal_to))
+    assert bound_equal_to == pickle.loads(pickle.dumps(bound_equal_to))
 
 
 def test_bound_not_equal_to(term: BoundReference[Any]) -> None:
@@ -848,6 +867,7 @@ def test_bound_not_equal_to(term: BoundReference[Any]) -> 
None:
     assert str(bound_not_equal_to) == f"BoundNotEqualTo(term={str(term)}, 
literal=literal('a'))"
     assert repr(bound_not_equal_to) == f"BoundNotEqualTo(term={repr(term)}, 
literal=literal('a'))"
     assert bound_not_equal_to == eval(repr(bound_not_equal_to))
+    assert bound_not_equal_to == pickle.loads(pickle.dumps(bound_not_equal_to))
 
 
 def test_bound_greater_than_or_equal_to(term: BoundReference[Any]) -> None:
@@ -855,6 +875,7 @@ def test_bound_greater_than_or_equal_to(term: 
BoundReference[Any]) -> None:
     assert str(bound_greater_than_or_equal_to) == 
f"BoundGreaterThanOrEqual(term={str(term)}, literal=literal('a'))"
     assert repr(bound_greater_than_or_equal_to) == 
f"BoundGreaterThanOrEqual(term={repr(term)}, literal=literal('a'))"
     assert bound_greater_than_or_equal_to == 
eval(repr(bound_greater_than_or_equal_to))
+    assert bound_greater_than_or_equal_to == 
pickle.loads(pickle.dumps(bound_greater_than_or_equal_to))
 
 
 def test_bound_greater_than(term: BoundReference[Any]) -> None:
@@ -862,6 +883,7 @@ def test_bound_greater_than(term: BoundReference[Any]) -> 
None:
     assert str(bound_greater_than) == f"BoundGreaterThan(term={str(term)}, 
literal=literal('a'))"
     assert repr(bound_greater_than) == f"BoundGreaterThan(term={repr(term)}, 
literal=literal('a'))"
     assert bound_greater_than == eval(repr(bound_greater_than))
+    assert bound_greater_than == pickle.loads(pickle.dumps(bound_greater_than))
 
 
 def test_bound_less_than(term: BoundReference[Any]) -> None:
@@ -869,6 +891,7 @@ def test_bound_less_than(term: BoundReference[Any]) -> None:
     assert str(bound_less_than) == f"BoundLessThan(term={str(term)}, 
literal=literal('a'))"
     assert repr(bound_less_than) == f"BoundLessThan(term={repr(term)}, 
literal=literal('a'))"
     assert bound_less_than == eval(repr(bound_less_than))
+    assert bound_less_than == pickle.loads(pickle.dumps(bound_less_than))
 
 
 def test_bound_less_than_or_equal(term: BoundReference[Any]) -> None:
@@ -876,6 +899,7 @@ def test_bound_less_than_or_equal(term: 
BoundReference[Any]) -> None:
     assert str(bound_less_than_or_equal) == 
f"BoundLessThanOrEqual(term={str(term)}, literal=literal('a'))"
     assert repr(bound_less_than_or_equal) == 
f"BoundLessThanOrEqual(term={repr(term)}, literal=literal('a'))"
     assert bound_less_than_or_equal == eval(repr(bound_less_than_or_equal))
+    assert bound_less_than_or_equal == 
pickle.loads(pickle.dumps(bound_less_than_or_equal))
 
 
 def test_equal_to() -> None:
@@ -883,6 +907,7 @@ def test_equal_to() -> None:
     assert str(equal_to) == "EqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(equal_to) == "EqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert equal_to == eval(repr(equal_to))
+    assert equal_to == pickle.loads(pickle.dumps(equal_to))
 
 
 def test_not_equal_to() -> None:
@@ -890,6 +915,7 @@ def test_not_equal_to() -> None:
     assert str(not_equal_to) == "NotEqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(not_equal_to) == "NotEqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert not_equal_to == eval(repr(not_equal_to))
+    assert not_equal_to == pickle.loads(pickle.dumps(not_equal_to))
 
 
 def test_greater_than_or_equal_to() -> None:
@@ -897,6 +923,7 @@ def test_greater_than_or_equal_to() -> None:
     assert str(greater_than_or_equal_to) == 
"GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert repr(greater_than_or_equal_to) == 
"GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert greater_than_or_equal_to == eval(repr(greater_than_or_equal_to))
+    assert greater_than_or_equal_to == 
pickle.loads(pickle.dumps(greater_than_or_equal_to))
 
 
 def test_greater_than() -> None:
@@ -904,6 +931,7 @@ def test_greater_than() -> None:
     assert str(greater_than) == "GreaterThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(greater_than) == "GreaterThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert greater_than == eval(repr(greater_than))
+    assert greater_than == pickle.loads(pickle.dumps(greater_than))
 
 
 def test_less_than() -> None:
@@ -911,6 +939,7 @@ def test_less_than() -> None:
     assert str(less_than) == "LessThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(less_than) == "LessThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert less_than == eval(repr(less_than))
+    assert less_than == pickle.loads(pickle.dumps(less_than))
 
 
 def test_less_than_or_equal() -> None:
@@ -918,6 +947,7 @@ def test_less_than_or_equal() -> None:
     assert str(less_than_or_equal) == 
"LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert repr(less_than_or_equal) == 
"LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert less_than_or_equal == eval(repr(less_than_or_equal))
+    assert less_than_or_equal == pickle.loads(pickle.dumps(less_than_or_equal))
 
 
 def test_bound_reference_eval(table_schema_simple: Schema) -> None:
diff --git a/python/tests/test_types.py b/python/tests/test_types.py
index e2c0272b45..dbe54688c7 100644
--- a/python/tests/test_types.py
+++ b/python/tests/test_types.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=W0123,W0613
+import pickle
 from typing import Type
 
 import pytest
@@ -63,6 +64,7 @@ non_parameterized_types = [
 @pytest.mark.parametrize("input_index, input_type", non_parameterized_types)
 def test_repr_primitive_types(input_index: int, input_type: 
Type[PrimitiveType]) -> None:
     assert isinstance(eval(repr(input_type())), input_type)
+    assert input_type == pickle.loads(pickle.dumps(input_type))
 
 
 @pytest.mark.parametrize(
@@ -109,6 +111,7 @@ def test_fixed_type() -> None:
     assert str(type_var) == str(eval(repr(type_var)))
     assert type_var == FixedType(5)
     assert type_var != FixedType(6)
+    assert type_var == pickle.loads(pickle.dumps(type_var))
 
 
 def test_decimal_type() -> None:
@@ -120,6 +123,7 @@ def test_decimal_type() -> None:
     assert str(type_var) == str(eval(repr(type_var)))
     assert type_var == DecimalType(9, 2)
     assert type_var != DecimalType(9, 3)
+    assert type_var == pickle.loads(pickle.dumps(type_var))
 
 
 def test_struct_type() -> None:
@@ -140,6 +144,7 @@ def test_struct_type() -> None:
     assert str(type_var) == str(eval(repr(type_var)))
     assert type_var == eval(repr(type_var))
     assert type_var != StructType(NestedField(1, "optional_field", 
IntegerType(), required=True))
+    assert type_var == pickle.loads(pickle.dumps(type_var))
 
 
 def test_list_type() -> None:
@@ -163,6 +168,7 @@ def test_list_type() -> None:
         ),
         True,
     )
+    assert type_var == pickle.loads(pickle.dumps(type_var))
 
 
 def test_map_type() -> None:
@@ -175,6 +181,7 @@ def test_map_type() -> None:
     assert type_var == eval(repr(type_var))
     assert type_var != MapType(1, LongType(), 2, UUIDType(), False)
     assert type_var != MapType(1, DoubleType(), 2, StringType(), True)
+    assert type_var == pickle.loads(pickle.dumps(type_var))
 
 
 def test_nested_field() -> None:
@@ -200,6 +207,7 @@ def test_nested_field() -> None:
     assert field_var.field_id == 1
     assert isinstance(field_var.field_type, StructType)
     assert str(field_var) == str(eval(repr(field_var)))
+    assert field_var == pickle.loads(pickle.dumps(field_var))
 
 
 @pytest.mark.parametrize("input_index,input_type", non_parameterized_types)

Reply via email to