This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new a7794ca  Arrow: Support Arrow large-string (#382)
a7794ca is described below

commit a7794cad8e09c43f26e7be627c4e999d330c6cec
Author: Fokko Driesprong <[email protected]>
AuthorDate: Wed Feb 7 14:30:15 2024 +0100

    Arrow: Support Arrow large-string (#382)
---
 pyiceberg/io/pyarrow.py          | 2 +-
 tests/io/test_pyarrow_visitor.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 91d8452..904fab2 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -864,7 +864,7 @@ class 
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
         elif isinstance(primitive, pa.Decimal128Type):
             primitive = cast(pa.Decimal128Type, primitive)
             return DecimalType(primitive.precision, primitive.scale)
-        elif pa.types.is_string(primitive):
+        elif pa.types.is_string(primitive) or 
pa.types.is_large_string(primitive):
             return StringType()
         elif pa.types.is_date32(primitive):
             return DateType()
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index c7f364b..c30a53a 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -272,6 +272,15 @@ def 
test_round_schema_conversion_nested(table_schema_nested: Schema) -> None:
     assert actual == expected
 
 
+def test_round_schema_large_string() -> None:
+    schema = pa.schema([pa.field("animals", pa.large_string())])
+    actual = str(pyarrow_to_schema(schema, 
name_mapping=NameMapping([MappedField(field_id=1, names=["animals"])])))
+    expected = """table {
+  1: animals: optional string
+}"""
+    assert actual == expected
+
+
 def test_simple_schema_has_missing_ids() -> None:
     schema = pa.schema([
         pa.field('foo', pa.string(), nullable=False),

Reply via email to