This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 3855f64b Core: Improve error for null/unknown schema types in table
creation (#2843)
3855f64b is described below
commit 3855f64b2ef5552483c377abeed95d2b9872777b
Author: Soham <[email protected]>
AuthorDate: Thu Jan 8 22:20:41 2026 +0530
Core: Improve error for null/unknown schema types in table creation (#2843)
What changed
- Add a client-side validation error when a PyArrow `null` type is used
with format version < 3, including the field path and a hint to use a
concrete type or format-version 3.
- Add a unit test to verify the clear error message when converting a
PyArrow schema with a null field.
Why
- Prevents misleading REST errors like “Cannot parse type string…
unknown” and points directly to the offending field.
Testing
- make lint
- uv run pytest tests/catalog/test_base.py -k rejects_null_type -v
- make test (timed out at ~42%)
Closes #2539
---------
Co-authored-by: Soham <[email protected]>
---
pyiceberg/io/pyarrow.py | 7 +++++++
tests/catalog/test_base.py | 10 ++++++++++
2 files changed, 17 insertions(+)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index efeb72cb..55ecc7ac 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1435,6 +1435,13 @@ class _ConvertToIceberg(PyArrowSchemaVisitor[IcebergType
| Schema]):
elif pa.types.is_null(primitive):
# PyArrow null type (pa.null()) is converted to Iceberg UnknownType
# UnknownType can be promoted to any primitive type in V3+ tables
per the Iceberg spec
+ if self._format_version < 3:
+ field_path = ".".join(self._field_names) if self._field_names
else "<root>"
+ raise ValueError(
+ "Null type (pa.null()) is not supported in Iceberg format
version "
+ f"{self._format_version}. Field: {field_path}. "
+ "Requires format-version=3+ or use a concrete type
(string, int, boolean, etc.)."
+ )
return UnknownType()
elif isinstance(primitive, pa.UuidType):
return UUIDType()
diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py
index 96e04c19..d91bfcdb 100644
--- a/tests/catalog/test_base.py
+++ b/tests/catalog/test_base.py
@@ -210,6 +210,16 @@ def test_convert_schema_if_needed(
assert expected == catalog._convert_schema_if_needed(schema)
+def test_convert_schema_if_needed_rejects_null_type(catalog: InMemoryCatalog)
-> None:
+ schema = pa.schema([pa.field("n1", pa.null())])
+ with pytest.raises(ValueError) as exc_info:
+ catalog._convert_schema_if_needed(schema)
+ message = str(exc_info.value)
+ assert "Null type" in message
+ assert "n1" in message
+ assert "format-version=3" in message
+
+
def test_create_table_pyarrow_schema(catalog: InMemoryCatalog,
pyarrow_schema_simple_without_ids: pa.Schema) -> None:
catalog.create_namespace(TEST_TABLE_NAMESPACE)
table = catalog.create_table(