This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new ce28e004 Make Literal Pydantic serializeable (#2575)
ce28e004 is described below

commit ce28e0045a9949304d0a80fa0c518509a61ab158
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Oct 7 03:45:01 2025 +0200

    Make Literal Pydantic serializeable (#2575)
    
    # Rationale for this change
    
    Resolves #2572
    
    ## Are these changes tested?
    
    ## Are there any user-facing changes?
    
    <!-- In the case of user-facing changes, please add the changelog label.
    -->
---
 pyiceberg/expressions/__init__.py   |  2 +-
 pyiceberg/expressions/literals.py   | 49 ++++++++++++++++++++++++++++---------
 tests/expressions/test_evaluator.py |  4 +--
 tests/expressions/test_literals.py  | 28 ++++++++++++++++-----
 4 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/pyiceberg/expressions/__init__.py 
b/pyiceberg/expressions/__init__.py
index c3b5ae74..a8c0fdf4 100644
--- a/pyiceberg/expressions/__init__.py
+++ b/pyiceberg/expressions/__init__.py
@@ -696,7 +696,7 @@ class In(SetPredicate[L]):
         if count == 0:
             return AlwaysFalse()
         elif count == 1:
-            return EqualTo(term, next(iter(literals)))  # type: ignore
+            return EqualTo(term, next(iter(literals)))
         else:
             return super().__new__(cls)
 
diff --git a/pyiceberg/expressions/literals.py 
b/pyiceberg/expressions/literals.py
index 921e24e2..0847f19c 100644
--- a/pyiceberg/expressions/literals.py
+++ b/pyiceberg/expressions/literals.py
@@ -30,7 +30,9 @@ from math import isnan
 from typing import Any, Generic, Type
 from uuid import UUID
 
-from pyiceberg.typedef import L
+from pydantic import Field, model_serializer
+
+from pyiceberg.typedef import IcebergRootModel, L
 from pyiceberg.types import (
     BinaryType,
     BooleanType,
@@ -52,7 +54,9 @@ from pyiceberg.utils.datetime import (
     date_str_to_days,
     date_to_days,
     datetime_to_micros,
+    days_to_date,
     micros_to_days,
+    micros_to_timestamp,
     time_str_to_micros,
     time_to_micros,
     timestamp_to_micros,
@@ -64,21 +68,24 @@ from pyiceberg.utils.singleton import Singleton
 UUID_BYTES_LENGTH = 16
 
 
-class Literal(Generic[L], ABC):
+class Literal(IcebergRootModel[L], Generic[L], ABC):  # type: ignore
     """Literal which has a value and can be converted between types."""
 
-    _value: L
+    root: L = Field()
+
+    def __init__(self, value: L, value_type: Type[L], /, **data):  # type: 
ignore
+        if value is None:
+            raise TypeError("Invalid literal value: None")
 
-    def __init__(self, value: L, value_type: Type[L]):
+        super().__init__(value)
         if value is None or not isinstance(value, value_type):
             raise TypeError(f"Invalid literal value: {value!r} (not a 
{value_type})")
         if isinstance(value, float) and isnan(value):
             raise ValueError("Cannot create expression literal from NaN.")
-        self._value = value
 
     @property
     def value(self) -> L:
-        return self._value
+        return self.root
 
     @singledispatchmethod
     @abstractmethod
@@ -136,7 +143,7 @@ def literal(value: L) -> Literal[L]:
         LongLiteral(123)
     """
     if isinstance(value, float):
-        return DoubleLiteral(value)  # type: ignore
+        return DoubleLiteral(value)
     elif isinstance(value, bool):
         return BooleanLiteral(value)
     elif isinstance(value, int):
@@ -144,17 +151,17 @@ def literal(value: L) -> Literal[L]:
     elif isinstance(value, str):
         return StringLiteral(value)
     elif isinstance(value, UUID):
-        return UUIDLiteral(value.bytes)  # type: ignore
+        return UUIDLiteral(value.bytes)
     elif isinstance(value, bytes):
         return BinaryLiteral(value)
     elif isinstance(value, Decimal):
         return DecimalLiteral(value)
     elif isinstance(value, datetime):
-        return TimestampLiteral(datetime_to_micros(value))  # type: ignore
+        return TimestampLiteral(datetime_to_micros(value))
     elif isinstance(value, date):
-        return DateLiteral(date_to_days(value))  # type: ignore
+        return DateLiteral(date_to_days(value))
     elif isinstance(value, time):
-        return TimeLiteral(time_to_micros(value))  # type: ignore
+        return TimeLiteral(time_to_micros(value))
     else:
         raise TypeError(f"Invalid literal value: {repr(value)}")
 
@@ -411,6 +418,10 @@ class DateLiteral(Literal[int]):
     def __init__(self, value: int) -> None:
         super().__init__(value, int)
 
+    @model_serializer
+    def ser_model(self) -> date:
+        return days_to_date(self.root)
+
     def increment(self) -> Literal[int]:
         return DateLiteral(self.value + 1)
 
@@ -443,6 +454,10 @@ class TimestampLiteral(Literal[int]):
     def __init__(self, value: int) -> None:
         super().__init__(value, int)
 
+    @model_serializer
+    def ser_model(self) -> str:
+        return micros_to_timestamp(self.root).isoformat()
+
     def increment(self) -> Literal[int]:
         return TimestampLiteral(self.value + 1)
 
@@ -635,6 +650,10 @@ class UUIDLiteral(Literal[bytes]):
     def __init__(self, value: bytes) -> None:
         super().__init__(value, bytes)
 
+    @model_serializer
+    def ser_model(self) -> UUID:
+        return UUID(bytes=self.root)
+
     @singledispatchmethod
     def to(self, type_var: IcebergType) -> Literal:  # type: ignore
         raise TypeError(f"Cannot convert UUIDLiteral into {type_var}")
@@ -661,6 +680,10 @@ class FixedLiteral(Literal[bytes]):
     def __init__(self, value: bytes) -> None:
         super().__init__(value, bytes)
 
+    @model_serializer
+    def ser_model(self) -> str:
+        return self.root.hex()
+
     @singledispatchmethod
     def to(self, type_var: IcebergType) -> Literal:  # type: ignore
         raise TypeError(f"Cannot convert FixedLiteral into {type_var}")
@@ -692,6 +715,10 @@ class BinaryLiteral(Literal[bytes]):
     def __init__(self, value: bytes) -> None:
         super().__init__(value, bytes)
 
+    @model_serializer
+    def ser_model(self) -> str:
+        return self.root.hex()
+
     @singledispatchmethod
     def to(self, type_var: IcebergType) -> Literal:  # type: ignore
         raise TypeError(f"Cannot convert BinaryLiteral into {type_var}")
diff --git a/tests/expressions/test_evaluator.py 
b/tests/expressions/test_evaluator.py
index 7b150991..cfc32d9b 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -683,7 +683,7 @@ def data_file_nan() -> DataFile:
 
 
 def 
test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan:
 Schema, data_file_nan: DataFile) -> None:
-    for operator in [LessThan, LessThanOrEqual]:
+    for operator in [LessThan, LessThanOrEqual]:  # type: ignore
         should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: all nan column doesn't 
contain number"
 
@@ -711,7 +711,7 @@ def 
test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_f
 def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal(
     schema_data_file_nan: Schema, data_file_nan: DataFile
 ) -> None:
-    for operator in [GreaterThan, GreaterThanOrEqual]:
+    for operator in [GreaterThan, GreaterThanOrEqual]:  # type: ignore
         should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, 
operator("all_nan", 1)).eval(data_file_nan)  # type: ignore[arg-type]
         assert not should_read, "Should not match: all nan column doesn't 
contain number"
 
diff --git a/tests/expressions/test_literals.py 
b/tests/expressions/test_literals.py
index 4d8f5557..2137681e 100644
--- a/tests/expressions/test_literals.py
+++ b/tests/expressions/test_literals.py
@@ -319,8 +319,8 @@ def test_string_to_time_literal() -> None:
 
     avro_val = 51661919000
 
-    assert isinstance(time_lit, TimeLiteral)  # type: ignore
-    assert avro_val == time_lit.value  # type: ignore
+    assert isinstance(time_lit, TimeLiteral)
+    assert avro_val == time_lit.value
 
 
 def test_string_to_timestamp_literal() -> None:
@@ -428,8 +428,8 @@ def test_python_date_conversion() -> None:
 
     from_str_lit = literal(one_day_str).to(DateType())
 
-    assert isinstance(from_str_lit, DateLiteral)  # type: ignore
-    assert from_str_lit.value == 19079  # type: ignore
+    assert isinstance(from_str_lit, DateLiteral)
+    assert from_str_lit.value == 19079
 
 
 @pytest.mark.parametrize(
@@ -911,7 +911,7 @@ def test_uuid_to_fixed() -> None:
     with pytest.raises(TypeError) as e:
         uuid_literal.to(FixedType(15))
     assert "Cannot convert UUIDLiteral into fixed[15], different length: 15 <> 
16" in str(e.value)
-    assert isinstance(fixed_literal, FixedLiteral)  # type: ignore
+    assert isinstance(fixed_literal, FixedLiteral)
 
 
 def test_uuid_to_binary() -> None:
@@ -919,7 +919,7 @@ def test_uuid_to_binary() -> None:
     uuid_literal = literal(test_uuid)
     binary_literal = uuid_literal.to(BinaryType())
     assert test_uuid.bytes == binary_literal.value
-    assert isinstance(binary_literal, BinaryLiteral)  # type: ignore
+    assert isinstance(binary_literal, BinaryLiteral)
 
 
 def test_literal_from_datetime() -> None:
@@ -930,6 +930,22 @@ def test_literal_from_date() -> None:
     assert isinstance(literal(datetime.date.today()), DateLiteral)
 
 
+def test_to_json() -> None:
+    assert literal(True).model_dump_json() == "true"
+    assert literal(float(123)).model_dump_json() == "123.0"
+    assert literal(123).model_dump_json() == "123"
+    assert literal("vo").model_dump_json() == '"vo"'
+    assert (
+        
literal(uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")).model_dump_json() == 
'"f79c3e09-677c-4bbd-a479-3f349cb785e7"'
+    )
+    assert literal(bytes([0x01, 0x02, 0x03])).model_dump_json() == '"010203"'
+    assert literal(Decimal("19.25")).model_dump_json() == '"19.25"'
+    assert 
literal(datetime.date.fromisoformat("2022-03-28")).model_dump_json() == 
'"2022-03-28"'
+    assert (
+        
literal(datetime.datetime.fromisoformat("1970-11-22T00:00:00.000000+00:00")).model_dump_json()
 == '"1970-11-22T00:00:00"'
+    )
+
+
 #   __  __      ___
 #  |  \/  |_  _| _ \_  _
 #  | |\/| | || |  _/ || |

Reply via email to