This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new d7dcd99  Arrow: Support int8 and int16 types (#391)
d7dcd99 is described below

commit d7dcd99f7098d7a61f4d8da193ffd04cd7b63a47
Author: Fokko Driesprong <[email protected]>
AuthorDate: Thu Feb 8 08:46:29 2024 +0100

    Arrow: Support int8 and int16 types (#391)
    
    I've checked with Spark, and here byte and short types are converted
    to integers. I think it makes sense to do this for Arrow as well.
---
 pyiceberg/io/pyarrow.py          | 13 +++++++++----
 tests/io/test_pyarrow_visitor.py | 12 ++++++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 904fab2..1b14771 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -853,10 +853,15 @@ class 
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
     def primitive(self, primitive: pa.DataType) -> PrimitiveType:
         if pa.types.is_boolean(primitive):
             return BooleanType()
-        elif pa.types.is_int32(primitive):
-            return IntegerType()
-        elif pa.types.is_int64(primitive):
-            return LongType()
+        elif pa.types.is_integer(primitive):
+            width = primitive.bit_width
+            if width <= 32:
+                return IntegerType()
+            elif width <= 64:
+                return LongType()
+            else:
+                # Does not exist (yet)
+                raise TypeError(f"Unsupported integer type: {primitive}")
         elif pa.types.is_float32(primitive):
             return FloatType()
         elif pa.types.is_float64(primitive):
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index c30a53a..c6ba18c 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -84,6 +84,18 @@ def test_pyarrow_boolean_to_iceberg() -> None:
     assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == 
pyarrow_type
 
 
+def test_pyarrow_int8_to_iceberg() -> None:
+    pyarrow_type = pa.int8()
+    converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
+    assert converted_iceberg_type == IntegerType()
+
+
+def test_pyarrow_int16_to_iceberg() -> None:
+    pyarrow_type = pa.int16()
+    converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
+    assert converted_iceberg_type == IntegerType()
+
+
 def test_pyarrow_int32_to_iceberg() -> None:
     pyarrow_type = pa.int32()
     converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())

Reply via email to