This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 3855f64b Core: Improve error for null/unknown schema types in table 
creation (#2843)
3855f64b is described below

commit 3855f64b2ef5552483c377abeed95d2b9872777b
Author: Soham <[email protected]>
AuthorDate: Thu Jan 8 22:20:41 2026 +0530

    Core: Improve error for null/unknown schema types in table creation (#2843)
    
    What changed
    - Add a client-side validation error when a PyArrow `null` type is used
    with format version < 3, including the field path and a hint to use a
    concrete type or format-version 3.
    - Add a unit test to verify the clear error message when converting a
    PyArrow schema with a null field.
    
    Why
    - Prevents misleading REST errors like “Cannot parse type string…
    unknown” and points directly to the offending field.
    
    Testing
    - make lint
    - uv run pytest tests/catalog/test_base.py -k rejects_null_type -v
    - make test (timed out at ~42%)
    
    Closes #2539
    
    ---------
    
    Co-authored-by: Soham <[email protected]>
---
 pyiceberg/io/pyarrow.py    |  7 +++++++
 tests/catalog/test_base.py | 10 ++++++++++
 2 files changed, 17 insertions(+)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index efeb72cb..55ecc7ac 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1435,6 +1435,13 @@ class _ConvertToIceberg(PyArrowSchemaVisitor[IcebergType 
| Schema]):
         elif pa.types.is_null(primitive):
             # PyArrow null type (pa.null()) is converted to Iceberg UnknownType
             # UnknownType can be promoted to any primitive type in V3+ tables 
per the Iceberg spec
+            if self._format_version < 3:
+                field_path = ".".join(self._field_names) if self._field_names 
else "<root>"
+                raise ValueError(
+                    "Null type (pa.null()) is not supported in Iceberg format 
version "
+                    f"{self._format_version}. Field: {field_path}. "
+                    "Requires format-version=3+ or use a concrete type 
(string, int, boolean, etc.)."
+                )
             return UnknownType()
         elif isinstance(primitive, pa.UuidType):
             return UUIDType()
diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py
index 96e04c19..d91bfcdb 100644
--- a/tests/catalog/test_base.py
+++ b/tests/catalog/test_base.py
@@ -210,6 +210,16 @@ def test_convert_schema_if_needed(
     assert expected == catalog._convert_schema_if_needed(schema)
 
 
+def test_convert_schema_if_needed_rejects_null_type(catalog: InMemoryCatalog) 
-> None:
+    schema = pa.schema([pa.field("n1", pa.null())])
+    with pytest.raises(ValueError) as exc_info:
+        catalog._convert_schema_if_needed(schema)
+    message = str(exc_info.value)
+    assert "Null type" in message
+    assert "n1" in message
+    assert "format-version=3" in message
+
+
 def test_create_table_pyarrow_schema(catalog: InMemoryCatalog, 
pyarrow_schema_simple_without_ids: pa.Schema) -> None:
     catalog.create_namespace(TEST_TABLE_NAMESPACE)
     table = catalog.create_table(

Reply via email to