This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new c36c7bad3c Python: Add pickle support (#7645)
c36c7bad3c is described below
commit c36c7bad3ce60c6ca225760f332342fbc75b6528
Author: Georg Grob <[email protected]>
AuthorDate: Wed Jun 14 15:01:00 2023 +0100
Python: Add pickle support (#7645)
* Python: Add pickle support
* Fix test
* Fix type annotation
* Update python/pyiceberg/types.py
* Update python/pyiceberg/types.py
* Update python/pyiceberg/types.py
---------
Co-authored-by: Georg Grob <[email protected]>
Co-authored-by: Fokko Driesprong <[email protected]>
---
python/pyiceberg/expressions/__init__.py | 19 ++++++++++++++
python/pyiceberg/types.py | 18 +++++++++++++
python/tests/expressions/test_expressions.py | 38 +++++++++++++++++++++++++---
python/tests/test_types.py | 8 ++++++
4 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/python/pyiceberg/expressions/__init__.py
b/python/pyiceberg/expressions/__init__.py
index 8a7d35cc18..8737b22b2e 100644
--- a/python/pyiceberg/expressions/__init__.py
+++ b/python/pyiceberg/expressions/__init__.py
@@ -24,6 +24,7 @@ from typing import (
Generic,
Iterable,
Set,
+ Tuple,
Type,
TypeVar,
Union,
@@ -220,6 +221,9 @@ class And(BooleanExpression):
# De Morgan's law: not (A and B) = (not A) or (not B)
return Or(~self.left, ~self.right)
+ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]:
+ return (self.left, self.right)
+
class Or(BooleanExpression):
"""OR operation expression - logical disjunction."""
@@ -252,6 +256,9 @@ class Or(BooleanExpression):
# De Morgan's law: not (A or B) = (not A) and (not B)
return And(~self.left, ~self.right)
+ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]:
+ return (self.left, self.right)
+
class Not(BooleanExpression):
"""NOT operation expression - logical negation."""
@@ -278,6 +285,9 @@ class Not(BooleanExpression):
def __invert__(self) -> BooleanExpression:
return self.child
+ def __getnewargs__(self) -> Tuple[BooleanExpression]:
+ return (self.child,)
+
class AlwaysTrue(BooleanExpression, Singleton):
"""TRUE expression."""
@@ -364,6 +374,9 @@ class BoundUnaryPredicate(BoundPredicate[L], ABC):
def as_unbound(self) -> Type[UnaryPredicate]:
...
+ def __getnewargs__(self) -> Tuple[BoundTerm[L]]:
+ return (self.term,)
+
class BoundIsNull(BoundUnaryPredicate[L]):
def __new__(cls, term: BoundTerm[L]) -> BooleanExpression: # type: ignore
# pylint: disable=W0221
@@ -481,6 +494,9 @@ class SetPredicate(UnboundPredicate[L], ABC):
def __eq__(self, other: Any) -> bool:
return self.term == other.term and self.literals == other.literals if
isinstance(other, SetPredicate) else False
+ def __getnewargs__(self) -> Tuple[UnboundTerm[L], Set[Literal[L]]]:
+ return (self.term, self.literals)
+
@property
@abstractmethod
def as_bound(self) -> Type[BoundSetPredicate[L]]:
@@ -510,6 +526,9 @@ class BoundSetPredicate(BoundPredicate[L], ABC):
def __eq__(self, other: Any) -> bool:
return self.term == other.term and self.literals == other.literals if
isinstance(other, BoundSetPredicate) else False
+ def __getnewargs__(self) -> Tuple[BoundTerm[L], Set[Literal[L]]]:
+ return (self.term, self.literals)
+
@property
@abstractmethod
def as_unbound(self) -> Type[SetPredicate[L]]:
diff --git a/python/pyiceberg/types.py b/python/pyiceberg/types.py
index 582aa6f749..f826ebcaf9 100644
--- a/python/pyiceberg/types.py
+++ b/python/pyiceberg/types.py
@@ -146,6 +146,9 @@ class FixedType(PrimitiveType):
def __repr__(self) -> str:
return f"FixedType(length={self._len})"
+ def __getnewargs__(self) -> Tuple[int]:
+ return (self._len,)
+
class DecimalType(PrimitiveType):
"""A fixed data type in Iceberg.
@@ -190,6 +193,9 @@ class DecimalType(PrimitiveType):
def __repr__(self) -> str:
return f"DecimalType(precision={self._precision}, scale={self._scale})"
+ def __getnewargs__(self) -> Tuple[int, int]:
+ return (self._precision, self._scale)
+
class NestedField(IcebergType):
"""Represents a field of a struct, a map key, a map value, or a list
element.
@@ -246,6 +252,9 @@ class NestedField(IcebergType):
req = "required" if self.required else "optional"
return f"{self.field_id}: {self.name}: {req} {self.field_type}{doc}"
+ def __getnewargs__(self) -> Tuple[int, str, IcebergType, bool,
Optional[str]]:
+ return (self.field_id, self.name, self.field_type, self.required,
self.doc)
+
@property
def optional(self) -> bool:
return not self.required
@@ -286,6 +295,9 @@ class StructType(IcebergType):
def __len__(self) -> int:
return len(self.fields)
+ def __getnewargs__(self) -> Tuple[NestedField, ...]:
+ return self.fields
+
class ListType(IcebergType):
"""A list type in Iceberg.
@@ -321,6 +333,9 @@ class ListType(IcebergType):
def __str__(self) -> str:
return f"list<{self.element_type}>"
+ def __getnewargs__(self) -> Tuple[int, IcebergType, bool]:
+ return (self.element_id, self.element_type, self.element_required)
+
class MapType(IcebergType):
"""A map type in Iceberg.
@@ -366,6 +381,9 @@ class MapType(IcebergType):
def __str__(self) -> str:
return f"map<{self.key_type}, {self.value_type}>"
+ def __getnewargs__(self) -> Tuple[int, IcebergType, int, IcebergType,
bool]:
+ return (self.key_id, self.key_type, self.value_id, self.value_type,
self.value_required)
+
class BooleanType(PrimitiveType):
"""A boolean data type in Iceberg can be represented using an instance of
this class.
diff --git a/python/tests/expressions/test_expressions.py
b/python/tests/expressions/test_expressions.py
index 5b82c14744..bd3a14165e 100644
--- a/python/tests/expressions/test_expressions.py
+++ b/python/tests/expressions/test_expressions.py
@@ -16,6 +16,7 @@
# under the License.
# pylint:disable=redefined-outer-name,eval-used
+import pickle
import uuid
from decimal import Decimal
from typing import Any
@@ -679,6 +680,7 @@ def test_bound_reference(field: NestedField, accessor:
Accessor) -> None:
assert str(bound_ref) == f"BoundReference(field={repr(field)},
accessor={repr(accessor)})"
assert repr(bound_ref) == f"BoundReference(field={repr(field)},
accessor={repr(accessor)})"
assert bound_ref == eval(repr(bound_ref))
+ assert bound_ref == pickle.loads(pickle.dumps(bound_ref))
def test_reference() -> None:
@@ -687,6 +689,7 @@ def test_reference() -> None:
assert str(ref) == "Reference(name='abc')"
assert repr(ref) == "Reference(name='abc')"
assert ref == eval(repr(ref))
+ assert ref == pickle.loads(pickle.dumps(ref))
def test_and() -> None:
@@ -696,6 +699,7 @@ def test_and() -> None:
assert str(and_) == f"And(left={str(null)}, right={str(nan)})"
assert repr(and_) == f"And(left={repr(null)}, right={repr(nan)})"
assert and_ == eval(repr(and_))
+ assert and_ == pickle.loads(pickle.dumps(and_))
def test_or() -> None:
@@ -705,14 +709,16 @@ def test_or() -> None:
assert str(or_) == f"Or(left={str(null)}, right={str(nan)})"
assert repr(or_) == f"Or(left={repr(null)}, right={repr(nan)})"
assert or_ == eval(repr(or_))
+ assert or_ == pickle.loads(pickle.dumps(or_))
def test_not() -> None:
null = IsNull(Reference("a"))
- or_ = Not(null)
- assert str(or_) == f"Not(child={str(null)})"
- assert repr(or_) == f"Not(child={repr(null)})"
- assert or_ == eval(repr(or_))
+ not_ = Not(null)
+ assert str(not_) == f"Not(child={str(null)})"
+ assert repr(not_) == f"Not(child={repr(null)})"
+ assert not_ == eval(repr(not_))
+ assert not_ == pickle.loads(pickle.dumps(not_))
def test_always_true() -> None:
@@ -720,6 +726,7 @@ def test_always_true() -> None:
assert str(always_true) == "AlwaysTrue()"
assert repr(always_true) == "AlwaysTrue()"
assert always_true == eval(repr(always_true))
+ assert always_true == pickle.loads(pickle.dumps(always_true))
def test_always_false() -> None:
@@ -727,6 +734,7 @@ def test_always_false() -> None:
assert str(always_false) == "AlwaysFalse()"
assert repr(always_false) == "AlwaysFalse()"
assert always_false == eval(repr(always_false))
+ assert always_false == pickle.loads(pickle.dumps(always_false))
def test_bound_reference_field_property() -> None:
@@ -756,6 +764,7 @@ def test_is_null() -> None:
assert str(is_null) == f"IsNull(term={str(ref)})"
assert repr(is_null) == f"IsNull(term={repr(ref)})"
assert is_null == eval(repr(is_null))
+ assert is_null == pickle.loads(pickle.dumps(is_null))
def test_not_null() -> None:
@@ -764,6 +773,7 @@ def test_not_null() -> None:
assert str(non_null) == f"NotNull(term={str(ref)})"
assert repr(non_null) == f"NotNull(term={repr(ref)})"
assert non_null == eval(repr(non_null))
+ assert non_null == pickle.loads(pickle.dumps(non_null))
def test_bound_is_nan(accessor: Accessor) -> None:
@@ -776,6 +786,7 @@ def test_bound_is_nan(accessor: Accessor) -> None:
assert str(bound_is_nan) == f"BoundIsNaN(term={str(term)})"
assert repr(bound_is_nan) == f"BoundIsNaN(term={repr(term)})"
assert bound_is_nan == eval(repr(bound_is_nan))
+ assert bound_is_nan == pickle.loads(pickle.dumps(bound_is_nan))
def test_bound_is_not_nan(accessor: Accessor) -> None:
@@ -788,6 +799,7 @@ def test_bound_is_not_nan(accessor: Accessor) -> None:
assert str(bound_not_nan) == f"BoundNotNaN(term={str(term)})"
assert repr(bound_not_nan) == f"BoundNotNaN(term={repr(term)})"
assert bound_not_nan == eval(repr(bound_not_nan))
+ assert bound_not_nan == pickle.loads(pickle.dumps(bound_not_nan))
def test_is_nan() -> None:
@@ -796,6 +808,7 @@ def test_is_nan() -> None:
assert str(is_nan) == f"IsNaN(term={str(ref)})"
assert repr(is_nan) == f"IsNaN(term={repr(ref)})"
assert is_nan == eval(repr(is_nan))
+ assert is_nan == pickle.loads(pickle.dumps(is_nan))
def test_not_nan() -> None:
@@ -804,6 +817,7 @@ def test_not_nan() -> None:
assert str(not_nan) == f"NotNaN(term={str(ref)})"
assert repr(not_nan) == f"NotNaN(term={repr(ref)})"
assert not_nan == eval(repr(not_nan))
+ assert not_nan == pickle.loads(pickle.dumps(not_nan))
def test_bound_in(term: BoundReference[Any]) -> None:
@@ -811,6 +825,7 @@ def test_bound_in(term: BoundReference[Any]) -> None:
assert str(bound_in) == f"BoundIn({str(term)}, {{a, b, c}})"
assert repr(bound_in) == f"BoundIn({repr(term)}, {{literal('a'),
literal('b'), literal('c')}})"
assert bound_in == eval(repr(bound_in))
+ assert bound_in == pickle.loads(pickle.dumps(bound_in))
def test_bound_not_in(term: BoundReference[Any]) -> None:
@@ -818,6 +833,7 @@ def test_bound_not_in(term: BoundReference[Any]) -> None:
assert str(bound_not_in) == f"BoundNotIn({str(term)}, {{a, b, c}})"
assert repr(bound_not_in) == f"BoundNotIn({repr(term)}, {{literal('a'),
literal('b'), literal('c')}})"
assert bound_not_in == eval(repr(bound_not_in))
+ assert bound_not_in == pickle.loads(pickle.dumps(bound_not_in))
def test_in() -> None:
@@ -826,6 +842,7 @@ def test_in() -> None:
assert str(unbound_in) == f"In({str(ref)}, {{a, b, c}})"
assert repr(unbound_in) == f"In({repr(ref)}, {{literal('a'), literal('b'),
literal('c')}})"
assert unbound_in == eval(repr(unbound_in))
+ assert unbound_in == pickle.loads(pickle.dumps(unbound_in))
def test_not_in() -> None:
@@ -834,6 +851,7 @@ def test_not_in() -> None:
assert str(not_in) == f"NotIn({str(ref)}, {{a, b, c}})"
assert repr(not_in) == f"NotIn({repr(ref)}, {{literal('a'), literal('b'),
literal('c')}})"
assert not_in == eval(repr(not_in))
+ assert not_in == pickle.loads(pickle.dumps(not_in))
def test_bound_equal_to(term: BoundReference[Any]) -> None:
@@ -841,6 +859,7 @@ def test_bound_equal_to(term: BoundReference[Any]) -> None:
assert str(bound_equal_to) == f"BoundEqualTo(term={str(term)},
literal=literal('a'))"
assert repr(bound_equal_to) == f"BoundEqualTo(term={repr(term)},
literal=literal('a'))"
assert bound_equal_to == eval(repr(bound_equal_to))
+ assert bound_equal_to == pickle.loads(pickle.dumps(bound_equal_to))
def test_bound_not_equal_to(term: BoundReference[Any]) -> None:
@@ -848,6 +867,7 @@ def test_bound_not_equal_to(term: BoundReference[Any]) ->
None:
assert str(bound_not_equal_to) == f"BoundNotEqualTo(term={str(term)},
literal=literal('a'))"
assert repr(bound_not_equal_to) == f"BoundNotEqualTo(term={repr(term)},
literal=literal('a'))"
assert bound_not_equal_to == eval(repr(bound_not_equal_to))
+ assert bound_not_equal_to == pickle.loads(pickle.dumps(bound_not_equal_to))
def test_bound_greater_than_or_equal_to(term: BoundReference[Any]) -> None:
@@ -855,6 +875,7 @@ def test_bound_greater_than_or_equal_to(term:
BoundReference[Any]) -> None:
assert str(bound_greater_than_or_equal_to) ==
f"BoundGreaterThanOrEqual(term={str(term)}, literal=literal('a'))"
assert repr(bound_greater_than_or_equal_to) ==
f"BoundGreaterThanOrEqual(term={repr(term)}, literal=literal('a'))"
assert bound_greater_than_or_equal_to ==
eval(repr(bound_greater_than_or_equal_to))
+ assert bound_greater_than_or_equal_to ==
pickle.loads(pickle.dumps(bound_greater_than_or_equal_to))
def test_bound_greater_than(term: BoundReference[Any]) -> None:
@@ -862,6 +883,7 @@ def test_bound_greater_than(term: BoundReference[Any]) ->
None:
assert str(bound_greater_than) == f"BoundGreaterThan(term={str(term)},
literal=literal('a'))"
assert repr(bound_greater_than) == f"BoundGreaterThan(term={repr(term)},
literal=literal('a'))"
assert bound_greater_than == eval(repr(bound_greater_than))
+ assert bound_greater_than == pickle.loads(pickle.dumps(bound_greater_than))
def test_bound_less_than(term: BoundReference[Any]) -> None:
@@ -869,6 +891,7 @@ def test_bound_less_than(term: BoundReference[Any]) -> None:
assert str(bound_less_than) == f"BoundLessThan(term={str(term)},
literal=literal('a'))"
assert repr(bound_less_than) == f"BoundLessThan(term={repr(term)},
literal=literal('a'))"
assert bound_less_than == eval(repr(bound_less_than))
+ assert bound_less_than == pickle.loads(pickle.dumps(bound_less_than))
def test_bound_less_than_or_equal(term: BoundReference[Any]) -> None:
@@ -876,6 +899,7 @@ def test_bound_less_than_or_equal(term:
BoundReference[Any]) -> None:
assert str(bound_less_than_or_equal) ==
f"BoundLessThanOrEqual(term={str(term)}, literal=literal('a'))"
assert repr(bound_less_than_or_equal) ==
f"BoundLessThanOrEqual(term={repr(term)}, literal=literal('a'))"
assert bound_less_than_or_equal == eval(repr(bound_less_than_or_equal))
+ assert bound_less_than_or_equal ==
pickle.loads(pickle.dumps(bound_less_than_or_equal))
def test_equal_to() -> None:
@@ -883,6 +907,7 @@ def test_equal_to() -> None:
assert str(equal_to) == "EqualTo(term=Reference(name='a'),
literal=literal('a'))"
assert repr(equal_to) == "EqualTo(term=Reference(name='a'),
literal=literal('a'))"
assert equal_to == eval(repr(equal_to))
+ assert equal_to == pickle.loads(pickle.dumps(equal_to))
def test_not_equal_to() -> None:
@@ -890,6 +915,7 @@ def test_not_equal_to() -> None:
assert str(not_equal_to) == "NotEqualTo(term=Reference(name='a'),
literal=literal('a'))"
assert repr(not_equal_to) == "NotEqualTo(term=Reference(name='a'),
literal=literal('a'))"
assert not_equal_to == eval(repr(not_equal_to))
+ assert not_equal_to == pickle.loads(pickle.dumps(not_equal_to))
def test_greater_than_or_equal_to() -> None:
@@ -897,6 +923,7 @@ def test_greater_than_or_equal_to() -> None:
assert str(greater_than_or_equal_to) ==
"GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
assert repr(greater_than_or_equal_to) ==
"GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
assert greater_than_or_equal_to == eval(repr(greater_than_or_equal_to))
+ assert greater_than_or_equal_to ==
pickle.loads(pickle.dumps(greater_than_or_equal_to))
def test_greater_than() -> None:
@@ -904,6 +931,7 @@ def test_greater_than() -> None:
assert str(greater_than) == "GreaterThan(term=Reference(name='a'),
literal=literal('a'))"
assert repr(greater_than) == "GreaterThan(term=Reference(name='a'),
literal=literal('a'))"
assert greater_than == eval(repr(greater_than))
+ assert greater_than == pickle.loads(pickle.dumps(greater_than))
def test_less_than() -> None:
@@ -911,6 +939,7 @@ def test_less_than() -> None:
assert str(less_than) == "LessThan(term=Reference(name='a'),
literal=literal('a'))"
assert repr(less_than) == "LessThan(term=Reference(name='a'),
literal=literal('a'))"
assert less_than == eval(repr(less_than))
+ assert less_than == pickle.loads(pickle.dumps(less_than))
def test_less_than_or_equal() -> None:
@@ -918,6 +947,7 @@ def test_less_than_or_equal() -> None:
assert str(less_than_or_equal) ==
"LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
assert repr(less_than_or_equal) ==
"LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
assert less_than_or_equal == eval(repr(less_than_or_equal))
+ assert less_than_or_equal == pickle.loads(pickle.dumps(less_than_or_equal))
def test_bound_reference_eval(table_schema_simple: Schema) -> None:
diff --git a/python/tests/test_types.py b/python/tests/test_types.py
index e2c0272b45..dbe54688c7 100644
--- a/python/tests/test_types.py
+++ b/python/tests/test_types.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=W0123,W0613
+import pickle
from typing import Type
import pytest
@@ -63,6 +64,7 @@ non_parameterized_types = [
@pytest.mark.parametrize("input_index, input_type", non_parameterized_types)
def test_repr_primitive_types(input_index: int, input_type:
Type[PrimitiveType]) -> None:
assert isinstance(eval(repr(input_type())), input_type)
+ assert input_type == pickle.loads(pickle.dumps(input_type))
@pytest.mark.parametrize(
@@ -109,6 +111,7 @@ def test_fixed_type() -> None:
assert str(type_var) == str(eval(repr(type_var)))
assert type_var == FixedType(5)
assert type_var != FixedType(6)
+ assert type_var == pickle.loads(pickle.dumps(type_var))
def test_decimal_type() -> None:
@@ -120,6 +123,7 @@ def test_decimal_type() -> None:
assert str(type_var) == str(eval(repr(type_var)))
assert type_var == DecimalType(9, 2)
assert type_var != DecimalType(9, 3)
+ assert type_var == pickle.loads(pickle.dumps(type_var))
def test_struct_type() -> None:
@@ -140,6 +144,7 @@ def test_struct_type() -> None:
assert str(type_var) == str(eval(repr(type_var)))
assert type_var == eval(repr(type_var))
assert type_var != StructType(NestedField(1, "optional_field",
IntegerType(), required=True))
+ assert type_var == pickle.loads(pickle.dumps(type_var))
def test_list_type() -> None:
@@ -163,6 +168,7 @@ def test_list_type() -> None:
),
True,
)
+ assert type_var == pickle.loads(pickle.dumps(type_var))
def test_map_type() -> None:
@@ -175,6 +181,7 @@ def test_map_type() -> None:
assert type_var == eval(repr(type_var))
assert type_var != MapType(1, LongType(), 2, UUIDType(), False)
assert type_var != MapType(1, DoubleType(), 2, StringType(), True)
+ assert type_var == pickle.loads(pickle.dumps(type_var))
def test_nested_field() -> None:
@@ -200,6 +207,7 @@ def test_nested_field() -> None:
assert field_var.field_id == 1
assert isinstance(field_var.field_type, StructType)
assert str(field_var) == str(eval(repr(field_var)))
+ assert field_var == pickle.loads(pickle.dumps(field_var))
@pytest.mark.parametrize("input_index,input_type", non_parameterized_types)