This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 5c68ad81 Support `string_view` and `binary_view` (#1748)
5c68ad81 is described below

commit 5c68ad81d144f6ab1855807fd3a133e944f9b0a6
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Mar 4 00:58:35 2025 +0100

    Support `string_view` and `binary_view` (#1748)
    
    Resolves #1745
---
 pyiceberg/io/pyarrow.py          |  4 ++--
 tests/io/test_pyarrow_visitor.py | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index bf16ec5e..eab26b0c 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1189,7 +1189,7 @@ class 
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
         elif isinstance(primitive, pa.Decimal128Type):
             primitive = cast(pa.Decimal128Type, primitive)
             return DecimalType(primitive.precision, primitive.scale)
-        elif pa.types.is_string(primitive) or 
pa.types.is_large_string(primitive):
+        elif pa.types.is_string(primitive) or 
pa.types.is_large_string(primitive) or pa.types.is_string_view(primitive):
             return StringType()
         elif pa.types.is_date32(primitive):
             return DateType()
@@ -1215,7 +1215,7 @@ class 
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
             elif primitive.tz is None:
                 return TimestampType()
 
-        elif pa.types.is_binary(primitive) or 
pa.types.is_large_binary(primitive):
+        elif pa.types.is_binary(primitive) or 
pa.types.is_large_binary(primitive) or pa.types.is_binary_view(primitive):
             return BinaryType()
         elif pa.types.is_fixed_size_binary(primitive):
             primitive = cast(pa.FixedSizeBinaryType, primitive)
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index d13822f5..9f5aff3f 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -225,18 +225,18 @@ def test_pyarrow_timestamp_tz_invalid_tz() -> None:
         visit_pyarrow(pyarrow_type, _ConvertToIceberg())
 
 
-def test_pyarrow_string_to_iceberg() -> None:
-    pyarrow_type = pa.large_string()
[email protected]("pyarrow_type", [pa.string(), pa.large_string(), 
pa.string_view()])
+def test_pyarrow_string_to_iceberg(pyarrow_type: pa.DataType) -> None:
     converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
     assert converted_iceberg_type == StringType()
-    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == 
pyarrow_type
+    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == 
pa.large_string()
 
 
-def test_pyarrow_variable_binary_to_iceberg() -> None:
-    pyarrow_type = pa.large_binary()
[email protected]("pyarrow_type", [pa.binary(), pa.large_binary(), 
pa.binary_view()])
+def test_pyarrow_variable_binary_to_iceberg(pyarrow_type: pa.DataType) -> None:
     converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
     assert converted_iceberg_type == BinaryType()
-    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == 
pyarrow_type
+    assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == 
pa.large_binary()
 
 
 def test_pyarrow_struct_to_iceberg() -> None:

Reply via email to