This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 3d087769 Set field-id when needed (#1867)
3d087769 is described below
commit 3d087769cb1758d5ed23237ade05dabae0d976be
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Apr 1 17:24:57 2025 +0200
Set field-id when needed (#1867)
Fixes #1798
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
# Are these changes tested?
# Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
pyiceberg/io/pyarrow.py | 2 +-
tests/integration/test_writes/test_writes.py | 36 ++++++++++++++++++++++++++++
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 5c70636e..a1fa696f 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1777,7 +1777,7 @@ class
ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, Optional[pa.Arra
field_arrays.append(array)
fields.append(self._construct_field(field, array.type))
elif field.optional:
- arrow_type = schema_to_pyarrow(field.field_type,
include_field_ids=False)
+ arrow_type = schema_to_pyarrow(field.field_type,
include_field_ids=self._include_field_ids)
field_arrays.append(pa.nulls(len(struct_array),
type=arrow_type))
fields.append(self._construct_field(field, arrow_type))
else:
diff --git a/tests/integration/test_writes/test_writes.py
b/tests/integration/test_writes/test_writes.py
index 59c795cf..8575b588 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -52,6 +52,7 @@ from pyiceberg.types import (
DateType,
DoubleType,
IntegerType,
+ ListType,
LongType,
NestedField,
StringType,
@@ -1647,3 +1648,38 @@ def test_abort_table_transaction_on_exception(
# Validate the transaction is aborted and no partial update is applied
assert len(tbl.scan().to_pandas()) == table_size # type: ignore
+
+
[email protected]
+def test_write_optional_list(session_catalog: Catalog) -> None:
+ identifier = "default.test_write_optional_list"
+ schema = Schema(
+ NestedField(field_id=1, name="name", field_type=StringType(),
required=False),
+ NestedField(
+ field_id=3,
+ name="my_list",
+ field_type=ListType(element_id=45, element=StringType(),
element_required=False),
+ required=False,
+ ),
+ )
+ session_catalog.create_table_if_not_exists(identifier, schema)
+
+ df_1 = pa.Table.from_pylist(
+ [
+ {"name": "one", "my_list": ["test"]},
+ {"name": "another", "my_list": ["test"]},
+ ]
+ )
+ session_catalog.load_table(identifier).append(df_1)
+
+ assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 2
+
+ df_2 = pa.Table.from_pylist(
+ [
+ {"name": "one"},
+ {"name": "another"},
+ ]
+ )
+ session_catalog.load_table(identifier).append(df_2)
+
+ assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 4