This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new a7794ca Arrow: Support Arrow large-string (#382)
a7794ca is described below
commit a7794cad8e09c43f26e7be627c4e999d330c6cec
Author: Fokko Driesprong <[email protected]>
AuthorDate: Wed Feb 7 14:30:15 2024 +0100
Arrow: Support Arrow large-string (#382)
---
pyiceberg/io/pyarrow.py | 2 +-
tests/io/test_pyarrow_visitor.py | 9 +++++++++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 91d8452..904fab2 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -864,7 +864,7 @@ class
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
elif isinstance(primitive, pa.Decimal128Type):
primitive = cast(pa.Decimal128Type, primitive)
return DecimalType(primitive.precision, primitive.scale)
- elif pa.types.is_string(primitive):
+ elif pa.types.is_string(primitive) or
pa.types.is_large_string(primitive):
return StringType()
elif pa.types.is_date32(primitive):
return DateType()
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index c7f364b..c30a53a 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -272,6 +272,15 @@ def
test_round_schema_conversion_nested(table_schema_nested: Schema) -> None:
assert actual == expected
+def test_round_schema_large_string() -> None:
+ schema = pa.schema([pa.field("animals", pa.large_string())])
+ actual = str(pyarrow_to_schema(schema,
name_mapping=NameMapping([MappedField(field_id=1, names=["animals"])])))
+ expected = """table {
+ 1: animals: optional string
+}"""
+ assert actual == expected
+
+
def test_simple_schema_has_missing_ids() -> None:
schema = pa.schema([
pa.field('foo', pa.string(), nullable=False),