This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 9610b414 fix: Set Expression serialization to use 'values' (#2782)
9610b414 is described below
commit 9610b41470b8728802cec487e30ef8294169dee5
Author: Drew Gallardo <[email protected]>
AuthorDate: Tue Nov 25 14:35:49 2025 -0800
fix: Set Expression serialization to use 'values' (#2782)
Related to #2775
# Rationale for this change
Update the Set Expression serialization to align with the REST spec.
Today, the set expression literals are serialized under items, instead
of values.
For instance:
```json
{
"term": "foo",
"type": "not-in",
"items": [
1,
2,
3
]
}
```
When it should be:
```json
{
"term": "foo",
"type": "not-in",
"values": [
1,
2,
3
]
}
```
**Rest ref**:
https://github.com/apache/iceberg/blob/47d5f5009eafbbb526e6e2c9cbeac3105bf34670/open-api/rest-catalog-open-api.yaml#L2353
**Expression ref**:
https://github.com/apache/iceberg/blob/d19e3ff07653167d902865281601a5da4e2f2def/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java#L189-L192
## Are these changes tested?
Yes
## Are there any user-facing changes?
only serialization, and initialization stays the same.
---
pyiceberg/expressions/__init__.py | 4 ++--
tests/expressions/test_expressions.py | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/pyiceberg/expressions/__init__.py
b/pyiceberg/expressions/__init__.py
index 4871da3e..a928b988 100644
--- a/pyiceberg/expressions/__init__.py
+++ b/pyiceberg/expressions/__init__.py
@@ -596,11 +596,11 @@ class SetPredicate(IcebergBaseModel, UnboundPredicate,
ABC):
model_config = ConfigDict(arbitrary_types_allowed=True)
type: TypingLiteral["in", "not-in"] = Field(default="in")
- literals: set[LiteralValue] = Field(alias="items")
+ literals: set[LiteralValue] = Field(alias="values")
def __init__(self, term: str | UnboundTerm, literals: Iterable[Any] |
Iterable[LiteralValue]):
literal_set = _to_literal_set(literals)
- super().__init__(term=_to_unbound_term(term), items=literal_set) #
type: ignore
+ super().__init__(term=_to_unbound_term(term), values=literal_set) #
type: ignore
object.__setattr__(self, "literals", literal_set)
def bind(self, schema: Schema, case_sensitive: bool = True) ->
BoundSetPredicate:
diff --git a/tests/expressions/test_expressions.py
b/tests/expressions/test_expressions.py
index 47c26c8e..f0d6cdbc 100644
--- a/tests/expressions/test_expressions.py
+++ b/tests/expressions/test_expressions.py
@@ -915,12 +915,12 @@ def test_not_in() -> None:
def test_serialize_in() -> None:
pred = In(term="foo", literals=[1, 2, 3])
- assert pred.model_dump_json() ==
'{"term":"foo","type":"in","items":[1,2,3]}'
+ assert pred.model_dump_json() ==
'{"term":"foo","type":"in","values":[1,2,3]}'
def test_serialize_not_in() -> None:
pred = NotIn(term="foo", literals=[1, 2, 3])
- assert pred.model_dump_json() ==
'{"term":"foo","type":"not-in","items":[1,2,3]}'
+ assert pred.model_dump_json() ==
'{"term":"foo","type":"not-in","values":[1,2,3]}'
def test_bound_equal_to(term: BoundReference) -> None: