This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new d7dcd99 Arrow: Support int8 and int16 types (#391)
d7dcd99 is described below
commit d7dcd99f7098d7a61f4d8da193ffd04cd7b63a47
Author: Fokko Driesprong <[email protected]>
AuthorDate: Thu Feb 8 08:46:29 2024 +0100
Arrow: Support int8 and int16 types (#391)
I've checked with Spark, and here byte and short types are converted
to integers. I think it makes sense to do this for Arrow as well.
---
pyiceberg/io/pyarrow.py | 13 +++++++++----
tests/io/test_pyarrow_visitor.py | 12 ++++++++++++
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 904fab2..1b14771 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -853,10 +853,15 @@ class
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
def primitive(self, primitive: pa.DataType) -> PrimitiveType:
if pa.types.is_boolean(primitive):
return BooleanType()
- elif pa.types.is_int32(primitive):
- return IntegerType()
- elif pa.types.is_int64(primitive):
- return LongType()
+ elif pa.types.is_integer(primitive):
+ width = primitive.bit_width
+ if width <= 32:
+ return IntegerType()
+ elif width <= 64:
+ return LongType()
+ else:
+ # Does not exist (yet)
+ raise TypeError(f"Unsupported integer type: {primitive}")
elif pa.types.is_float32(primitive):
return FloatType()
elif pa.types.is_float64(primitive):
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index c30a53a..c6ba18c 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -84,6 +84,18 @@ def test_pyarrow_boolean_to_iceberg() -> None:
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) ==
pyarrow_type
+def test_pyarrow_int8_to_iceberg() -> None:
+ pyarrow_type = pa.int8()
+ converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
+ assert converted_iceberg_type == IntegerType()
+
+
+def test_pyarrow_int16_to_iceberg() -> None:
+ pyarrow_type = pa.int16()
+ converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
+ assert converted_iceberg_type == IntegerType()
+
+
def test_pyarrow_int32_to_iceberg() -> None:
pyarrow_type = pa.int32()
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())