(iceberg-python) branch main updated: feat: make LiteralPredicate serializable via internal IcebergBaseModel (#2561)

fokko Sun, 19 Oct 2025 11:52:26 -0700

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git



The following commit(s) were added to refs/heads/main by this push:
     new 617e258c feat: make LiteralPredicate serializable via internal 
IcebergBaseModel (#2561)
617e258c is described below

commit 617e258c2d29648230dccb3e6d4260fc7d22e4ff
Author: Jaime Fernández <[email protected]>
AuthorDate: Sun Oct 19 20:50:54 2025 +0200

    feat: make LiteralPredicate serializable via internal IcebergBaseModel 
(#2561)
    
    <!--
    Thanks for opening a pull request!
    -->
    
    <!-- In the case this PR will resolve an issue, please replace
    ${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
    Closes [#2523](https://github.com/apache/iceberg-python/issues/2523)
    
    # Rationale for this change
    
    ### Spec alignment
    `LiteralPredicate.type` uses the same enum as the REST OpenAPI
    `LiteralExpression.type`:
    `"lt" | "lt-eq" | "gt" | "gt-eq" | "eq" | "not-eq" | "starts-with" |
    "not-starts-with"`.
    Source: OpenAPI spec (LiteralExpression).
    
    Ref:
    
https://github.com/apache/iceberg/blob/b987e60bbd581d6e9e583107d5a85022261ff0d8/open-api/rest-catalog-open-api.yaml#L2264
    
    ## Are these changes tested?
    yes
    
    ## Are there any user-facing changes?
    
    <!-- In the case of user-facing changes, please add the changelog label.
    -->
---
 pyiceberg/expressions/__init__.py     | 36 +++++++++++++++++++++++----
 pyiceberg/transforms.py               |  2 +-
 tests/expressions/test_evaluator.py   | 46 +++++++++++++++++++----------------
 tests/expressions/test_expressions.py | 31 +++++++++++++++++++++--
 4 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/pyiceberg/expressions/__init__.py 
b/pyiceberg/expressions/__init__.py
index d0824cc3..2d733383 100644
--- a/pyiceberg/expressions/__init__.py
+++ b/pyiceberg/expressions/__init__.py
@@ -743,12 +743,18 @@ class NotIn(SetPredicate[L], ABC):
         return BoundNotIn[L]
 
 
-class LiteralPredicate(UnboundPredicate[L], ABC):
-    literal: Literal[L]
+class LiteralPredicate(IcebergBaseModel, UnboundPredicate[L], ABC):
+    type: TypingLiteral["lt", "lt-eq", "gt", "gt-eq", "eq", "not-eq", 
"starts-with", "not-starts-with"] = Field(alias="type")
+    term: UnboundTerm[Any]
+    value: Literal[L] = Field()
+    model_config = ConfigDict(populate_by_name=True, frozen=True, 
arbitrary_types_allowed=True)
 
-    def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, 
Literal[L]]):  # pylint: disable=W0621
-        super().__init__(term)
-        self.literal = _to_literal(literal)  # pylint: disable=W0621
+    def __init__(self, term: Union[str, UnboundTerm[Any]], literal: Union[L, 
Literal[L]]):
+        super().__init__(term=_to_unbound_term(term), 
value=_to_literal(literal))  # type: ignore[call-arg]
+
+    @property
+    def literal(self) -> Literal[L]:
+        return self.value
 
     def bind(self, schema: Schema, case_sensitive: bool = True) -> 
BoundLiteralPredicate[L]:
         bound_term = self.term.bind(schema, case_sensitive)
@@ -773,6 +779,10 @@ class LiteralPredicate(UnboundPredicate[L], ABC):
             return self.term == other.term and self.literal == other.literal
         return False
 
+    def __str__(self) -> str:
+        """Return the string representation of the LiteralPredicate class."""
+        return f"{str(self.__class__.__name__)}(term={repr(self.term)}, 
literal={repr(self.literal)})"
+
     def __repr__(self) -> str:
         """Return the string representation of the LiteralPredicate class."""
         return f"{str(self.__class__.__name__)}(term={repr(self.term)}, 
literal={repr(self.literal)})"
@@ -886,6 +896,8 @@ class BoundNotStartsWith(BoundLiteralPredicate[L]):
 
 
 class EqualTo(LiteralPredicate[L]):
+    type: TypingLiteral["eq"] = Field(default="eq", alias="type")
+
     def __invert__(self) -> NotEqualTo[L]:
         """Transform the Expression into its negated version."""
         return NotEqualTo[L](self.term, self.literal)
@@ -896,6 +908,8 @@ class EqualTo(LiteralPredicate[L]):
 
 
 class NotEqualTo(LiteralPredicate[L]):
+    type: TypingLiteral["not-eq"] = Field(default="not-eq", alias="type")
+
     def __invert__(self) -> EqualTo[L]:
         """Transform the Expression into its negated version."""
         return EqualTo[L](self.term, self.literal)
@@ -906,6 +920,8 @@ class NotEqualTo(LiteralPredicate[L]):
 
 
 class LessThan(LiteralPredicate[L]):
+    type: TypingLiteral["lt"] = Field(default="lt", alias="type")
+
     def __invert__(self) -> GreaterThanOrEqual[L]:
         """Transform the Expression into its negated version."""
         return GreaterThanOrEqual[L](self.term, self.literal)
@@ -916,6 +932,8 @@ class LessThan(LiteralPredicate[L]):
 
 
 class GreaterThanOrEqual(LiteralPredicate[L]):
+    type: TypingLiteral["gt-eq"] = Field(default="gt-eq", alias="type")
+
     def __invert__(self) -> LessThan[L]:
         """Transform the Expression into its negated version."""
         return LessThan[L](self.term, self.literal)
@@ -926,6 +944,8 @@ class GreaterThanOrEqual(LiteralPredicate[L]):
 
 
 class GreaterThan(LiteralPredicate[L]):
+    type: TypingLiteral["gt"] = Field(default="gt", alias="type")
+
     def __invert__(self) -> LessThanOrEqual[L]:
         """Transform the Expression into its negated version."""
         return LessThanOrEqual[L](self.term, self.literal)
@@ -936,6 +956,8 @@ class GreaterThan(LiteralPredicate[L]):
 
 
 class LessThanOrEqual(LiteralPredicate[L]):
+    type: TypingLiteral["lt-eq"] = Field(default="lt-eq", alias="type")
+
     def __invert__(self) -> GreaterThan[L]:
         """Transform the Expression into its negated version."""
         return GreaterThan[L](self.term, self.literal)
@@ -946,6 +968,8 @@ class LessThanOrEqual(LiteralPredicate[L]):
 
 
 class StartsWith(LiteralPredicate[L]):
+    type: TypingLiteral["starts-with"] = Field(default="starts-with", 
alias="type")
+
     def __invert__(self) -> NotStartsWith[L]:
         """Transform the Expression into its negated version."""
         return NotStartsWith[L](self.term, self.literal)
@@ -956,6 +980,8 @@ class StartsWith(LiteralPredicate[L]):
 
 
 class NotStartsWith(LiteralPredicate[L]):
+    type: TypingLiteral["not-starts-with"] = Field(default="not-starts-with", 
alias="type")
+
     def __invert__(self) -> StartsWith[L]:
         """Transform the Expression into its negated version."""
         return StartsWith[L](self.term, self.literal)
diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py
index 30b39293..4069a953 100644
--- a/pyiceberg/transforms.py
+++ b/pyiceberg/transforms.py
@@ -120,7 +120,7 @@ def _try_import(module_name: str, extras_name: 
Optional[str] = None) -> types.Mo
         raise NotInstalledError(msg) from None
 
 
-def _transform_literal(func: Callable[[L], L], lit: Literal[L]) -> Literal[L]:
+def _transform_literal(func: Callable[[Any], Any], lit: Literal[L]) -> 
Literal[L]:
     """Small helper to upwrap the value from the literal, and wrap it again."""
     return literal(func(lit.value))
 
diff --git a/tests/expressions/test_evaluator.py 
b/tests/expressions/test_evaluator.py
index cfc32d9b..07888dd4 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -22,6 +22,7 @@ import pytest
 from pyiceberg.conversions import to_bytes
 from pyiceberg.expressions import (
     And,
+    BooleanExpression,
     EqualTo,
     GreaterThan,
     GreaterThanOrEqual,
@@ -30,6 +31,7 @@ from pyiceberg.expressions import (
     IsNull,
     LessThan,
     LessThanOrEqual,
+    LiteralPredicate,
     Not,
     NotEqualTo,
     NotIn,
@@ -301,7 +303,7 @@ def test_missing_stats() -> None:
         upper_bounds=None,
     )
 
-    expressions = [
+    expressions: list[BooleanExpression] = [
         LessThan("no_stats", 5),
         LessThanOrEqual("no_stats", 30),
         EqualTo("no_stats", 70),
@@ -324,7 +326,7 @@ def test_zero_record_file_stats(schema_data_file: Schema) 
-> None:
         file_path="file_1.parquet", file_format=FileFormat.PARQUET, 
partition=Record(), record_count=0
     )
 
-    expressions = [
+    expressions: list[BooleanExpression] = [
         LessThan("no_stats", 5),
         LessThanOrEqual("no_stats", 30),
         EqualTo("no_stats", 70),
@@ -683,26 +685,27 @@ def data_file_nan() -> DataFile:
 
 
 def 
test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan:
 Schema, data_file_nan: DataFile) -> None:
-    for operator in [LessThan, LessThanOrEqual]:  # type: ignore
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
+    operators: tuple[type[LiteralPredicate[Any]], ...] = (LessThan, 
LessThanOrEqual)
+    for operator in operators:
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 1)).eval(data_file_nan)
         assert not should_read, "Should not match: all nan column doesn't 
contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 1)).eval(data_file_nan)
         assert not should_read, "Should not match: 1 is smaller than lower 
bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 10)).eval(data_file_nan)  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 10)).eval(data_file_nan)
         assert should_read, "Should match: 10 is larger than lower bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("min_max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("min_max_nan", 1)).eval(data_file_nan)
         assert should_read, "Should match: no visibility"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan_null_bounds", 1)).eval(data_file_nan)  # type: 
ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan_null_bounds", 1)).eval(data_file_nan)
         assert not should_read, "Should not match: all nan column doesn't 
contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 1)).eval(data_file_nan)  # type: 
ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 1)).eval(data_file_nan)
         assert not should_read, "Should not match: 1 is smaller than lower 
bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 10)).eval(  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 10)).eval(
             data_file_nan
         )
         assert should_read, "Should match: 10 larger than lower bound"
@@ -711,31 +714,32 @@ def 
test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_f
 def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal(
     schema_data_file_nan: Schema, data_file_nan: DataFile
 ) -> None:
-    for operator in [GreaterThan, GreaterThanOrEqual]:  # type: ignore
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
+    operators: tuple[type[LiteralPredicate[Any]], ...] = (GreaterThan, 
GreaterThanOrEqual)
+    for operator in operators:
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 1)).eval(data_file_nan)
         assert not should_read, "Should not match: all nan column doesn't 
contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 1)).eval(data_file_nan)
         assert should_read, "Should match: upper bound is larger than 1"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 10)).eval(data_file_nan)  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("max_nan", 10)).eval(data_file_nan)
         assert should_read, "Should match: upper bound is larger than 10"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("min_max_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("min_max_nan", 1)).eval(data_file_nan)
         assert should_read, "Should match: no visibility"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan_null_bounds", 1)).eval(data_file_nan)  # type: 
ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan_null_bounds", 1)).eval(data_file_nan)
         assert not should_read, "Should not match: all nan column doesn't 
contain number"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 1)).eval(data_file_nan)  # type: 
ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 1)).eval(data_file_nan)
         assert should_read, "Should match: 1 is smaller than upper bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 10)).eval(  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("some_nan_correct_bounds", 10)).eval(
             data_file_nan
         )
         assert should_read, "Should match: 10 is smaller than upper bound"
 
-        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 30)).eval(data_file_nan)  # type: ignore[arg-type]
+        should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 30)).eval(data_file_nan)
         assert not should_read, "Should not match: 30 is greater than upper 
bound"
 
 
@@ -1162,7 +1166,7 @@ def test_strict_missing_stats(strict_data_file_schema: 
Schema, strict_data_file_
         upper_bounds=None,
     )
 
-    expressions = [
+    expressions: list[BooleanExpression] = [
         LessThan("no_stats", 5),
         LessThanOrEqual("no_stats", 30),
         EqualTo("no_stats", 70),
@@ -1185,7 +1189,7 @@ def 
test_strict_zero_record_file_stats(strict_data_file_schema: Schema) -> None:
         file_path="file_1.parquet", file_format=FileFormat.PARQUET, 
partition=Record(), record_count=0
     )
 
-    expressions = [
+    expressions: list[BooleanExpression] = [
         LessThan("no_stats", 5),
         LessThanOrEqual("no_stats", 30),
         EqualTo("no_stats", 70),
diff --git a/tests/expressions/test_expressions.py 
b/tests/expressions/test_expressions.py
index 115cc402..63673fda 100644
--- a/tests/expressions/test_expressions.py
+++ b/tests/expressions/test_expressions.py
@@ -50,19 +50,22 @@ from pyiceberg.expressions import (
     IsNull,
     LessThan,
     LessThanOrEqual,
+    LiteralPredicate,
     Not,
     NotEqualTo,
     NotIn,
     NotNaN,
     NotNull,
+    NotStartsWith,
     Or,
     Reference,
+    StartsWith,
     UnboundPredicate,
 )
 from pyiceberg.expressions.literals import Literal, literal
 from pyiceberg.expressions.visitors import _from_byte_buffer
 from pyiceberg.schema import Accessor, Schema
-from pyiceberg.typedef import Record
+from pyiceberg.typedef import L, Record
 from pyiceberg.types import (
     DecimalType,
     DoubleType,
@@ -915,6 +918,7 @@ def test_bound_less_than_or_equal(term: 
BoundReference[Any]) -> None:
 
 def test_equal_to() -> None:
     equal_to = EqualTo(Reference("a"), literal("a"))
+    assert equal_to.model_dump_json() == '{"term":"a","type":"eq","value":"a"}'
     assert str(equal_to) == "EqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(equal_to) == "EqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert equal_to == eval(repr(equal_to))
@@ -923,6 +927,7 @@ def test_equal_to() -> None:
 
 def test_not_equal_to() -> None:
     not_equal_to = NotEqualTo(Reference("a"), literal("a"))
+    assert not_equal_to.model_dump_json() == 
'{"term":"a","type":"not-eq","value":"a"}'
     assert str(not_equal_to) == "NotEqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(not_equal_to) == "NotEqualTo(term=Reference(name='a'), 
literal=literal('a'))"
     assert not_equal_to == eval(repr(not_equal_to))
@@ -931,6 +936,7 @@ def test_not_equal_to() -> None:
 
 def test_greater_than_or_equal_to() -> None:
     greater_than_or_equal_to = GreaterThanOrEqual(Reference("a"), literal("a"))
+    assert greater_than_or_equal_to.model_dump_json() == 
'{"term":"a","type":"gt-eq","value":"a"}'
     assert str(greater_than_or_equal_to) == 
"GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert repr(greater_than_or_equal_to) == 
"GreaterThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert greater_than_or_equal_to == eval(repr(greater_than_or_equal_to))
@@ -939,6 +945,7 @@ def test_greater_than_or_equal_to() -> None:
 
 def test_greater_than() -> None:
     greater_than = GreaterThan(Reference("a"), literal("a"))
+    assert greater_than.model_dump_json() == 
'{"term":"a","type":"gt","value":"a"}'
     assert str(greater_than) == "GreaterThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(greater_than) == "GreaterThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert greater_than == eval(repr(greater_than))
@@ -947,6 +954,7 @@ def test_greater_than() -> None:
 
 def test_less_than() -> None:
     less_than = LessThan(Reference("a"), literal("a"))
+    assert less_than.model_dump_json() == 
'{"term":"a","type":"lt","value":"a"}'
     assert str(less_than) == "LessThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert repr(less_than) == "LessThan(term=Reference(name='a'), 
literal=literal('a'))"
     assert less_than == eval(repr(less_than))
@@ -955,12 +963,23 @@ def test_less_than() -> None:
 
 def test_less_than_or_equal() -> None:
     less_than_or_equal = LessThanOrEqual(Reference("a"), literal("a"))
+    assert less_than_or_equal.model_dump_json() == 
'{"term":"a","type":"lt-eq","value":"a"}'
     assert str(less_than_or_equal) == 
"LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert repr(less_than_or_equal) == 
"LessThanOrEqual(term=Reference(name='a'), literal=literal('a'))"
     assert less_than_or_equal == eval(repr(less_than_or_equal))
     assert less_than_or_equal == pickle.loads(pickle.dumps(less_than_or_equal))
 
 
+def test_starts_with() -> None:
+    starts_with = StartsWith(Reference("a"), literal("a"))
+    assert starts_with.model_dump_json() == 
'{"term":"a","type":"starts-with","value":"a"}'
+
+
+def test_not_starts_with() -> None:
+    not_starts_with = NotStartsWith(Reference("a"), literal("a"))
+    assert not_starts_with.model_dump_json() == 
'{"term":"a","type":"not-starts-with","value":"a"}'
+
+
 def test_bound_reference_eval(table_schema_simple: Schema) -> None:
     """Test creating a BoundReference and evaluating it on a StructProtocol"""
     struct = Record("foovalue", 123, True)
@@ -1199,7 +1218,15 @@ def test_bind_ambiguous_name() -> None:
 #  |_|  |_|\_, |_|  \_, |
 #          |__/     |__/
 
-assert_type(EqualTo("a", "b"), EqualTo[str])
+
+def _assert_literal_predicate_type(expr: LiteralPredicate[L]) -> None:
+    assert_type(expr, LiteralPredicate[L])
+
+
+_assert_literal_predicate_type(EqualTo("a", "b"))
+_assert_literal_predicate_type(In("a", ("a", "b", "c")))
+_assert_literal_predicate_type(In("a", (1, 2, 3)))
+_assert_literal_predicate_type(NotIn("a", ("a", "b", "c")))
 assert_type(In("a", ("a", "b", "c")), In[str])
 assert_type(In("a", (1, 2, 3)), In[int])
 assert_type(NotIn("a", ("a", "b", "c")), NotIn[str])

(iceberg-python) branch main updated: feat: make LiteralPredicate serializable via internal IcebergBaseModel (#2561)

Reply via email to