This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 5c68ad81 Support `string_view` and `binary_view` (#1748)
5c68ad81 is described below
commit 5c68ad81d144f6ab1855807fd3a133e944f9b0a6
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Mar 4 00:58:35 2025 +0100
Support `string_view` and `binary_view` (#1748)
Resolves #1745
---
pyiceberg/io/pyarrow.py | 4 ++--
tests/io/test_pyarrow_visitor.py | 12 ++++++------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index bf16ec5e..eab26b0c 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -1189,7 +1189,7 @@ class
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
elif isinstance(primitive, pa.Decimal128Type):
primitive = cast(pa.Decimal128Type, primitive)
return DecimalType(primitive.precision, primitive.scale)
- elif pa.types.is_string(primitive) or
pa.types.is_large_string(primitive):
+ elif pa.types.is_string(primitive) or
pa.types.is_large_string(primitive) or pa.types.is_string_view(primitive):
return StringType()
elif pa.types.is_date32(primitive):
return DateType()
@@ -1215,7 +1215,7 @@ class
_ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]):
elif primitive.tz is None:
return TimestampType()
- elif pa.types.is_binary(primitive) or
pa.types.is_large_binary(primitive):
+ elif pa.types.is_binary(primitive) or
pa.types.is_large_binary(primitive) or pa.types.is_binary_view(primitive):
return BinaryType()
elif pa.types.is_fixed_size_binary(primitive):
primitive = cast(pa.FixedSizeBinaryType, primitive)
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index d13822f5..9f5aff3f 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -225,18 +225,18 @@ def test_pyarrow_timestamp_tz_invalid_tz() -> None:
visit_pyarrow(pyarrow_type, _ConvertToIceberg())
-def test_pyarrow_string_to_iceberg() -> None:
- pyarrow_type = pa.large_string()
[email protected]("pyarrow_type", [pa.string(), pa.large_string(),
pa.string_view()])
+def test_pyarrow_string_to_iceberg(pyarrow_type: pa.DataType) -> None:
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
assert converted_iceberg_type == StringType()
- assert visit(converted_iceberg_type, _ConvertToArrowSchema()) ==
pyarrow_type
+ assert visit(converted_iceberg_type, _ConvertToArrowSchema()) ==
pa.large_string()
-def test_pyarrow_variable_binary_to_iceberg() -> None:
- pyarrow_type = pa.large_binary()
[email protected]("pyarrow_type", [pa.binary(), pa.large_binary(),
pa.binary_view()])
+def test_pyarrow_variable_binary_to_iceberg(pyarrow_type: pa.DataType) -> None:
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
assert converted_iceberg_type == BinaryType()
- assert visit(converted_iceberg_type, _ConvertToArrowSchema()) ==
pyarrow_type
+ assert visit(converted_iceberg_type, _ConvertToArrowSchema()) ==
pa.large_binary()
def test_pyarrow_struct_to_iceberg() -> None: