rdblue commented on code in PR #5949:
URL: https://github.com/apache/iceberg/pull/5949#discussion_r996211250
##########
python/pyiceberg/io/pyarrow.py:
##########
@@ -239,3 +266,102 @@ def delete(self, location: Union[str, InputFile,
OutputFile]) -> None:
elif e.errno == 13 or "AWS Error [code 15]" in str(e):
raise PermissionError(f"Cannot delete file, access denied:
{location}") from e
raise # pragma: no cover - If some other kind of OSError, raise
the raw error
+
+
+def convert_iceberg_schema_to_pyarrow(schema: Schema) -> pa.schema:
+ return visit(schema, _ConvertToArrowSchema())
+
+
+class _ConvertToArrowSchema(SchemaVisitor[pa.DataType]):
+ def schema(self, _: Schema, struct_result: pa.StructType) -> pa.schema:
+ return pa.schema(list(struct_result))
+
+ def struct(self, _: StructType, field_results: List[pa.DataType]) ->
pa.DataType:
+ return pa.struct(field_results)
+
+ def field(self, field: NestedField, field_result: pa.DataType) -> pa.Field:
+ return pa.field(
+ name=field.name, type=field_result, nullable=not field.required,
metadata={"doc": field.doc} if field.doc else {}
+ )
+
+ def list(self, _: ListType, element_result: pa.DataType) -> pa.DataType:
+ return pa.list_(value_type=element_result)
+
+ def map(self, _: MapType, key_result: pa.DataType, value_result:
pa.DataType) -> pa.DataType:
+ return pa.map_(key_type=key_result, item_type=value_result)
+
+ def primitive(self, primitive: PrimitiveType) -> pa.DataType:
+ return _iceberg_to_pyarrow_type(primitive)
+
+
+@singledispatch
+def _iceberg_to_pyarrow_type(primitive: PrimitiveType) -> pa.DataType:
+ raise ValueError(f"Unknown type: {primitive}")
+
+
+@_iceberg_to_pyarrow_type.register
+def _(primitive: FixedType) -> pa.DataType:
+ return pa.binary(primitive.length)
+
+
+@_iceberg_to_pyarrow_type.register
+def _(primitive: DecimalType) -> pa.DataType:
+ return pa.decimal128(primitive.precision, primitive.scale)
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: BooleanType) -> pa.DataType:
+ return pa.bool_()
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: IntegerType) -> pa.DataType:
+ return pa.int32()
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: LongType) -> pa.DataType:
+ return pa.int64()
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: FloatType) -> pa.DataType:
+ # 32-bit IEEE 754 floating point
+ return pa.float32()
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: DoubleType) -> pa.DataType:
+ # 64-bit IEEE 754 floating point
+ return pa.float64()
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: DateType) -> pa.DataType:
+ # Date encoded as an int
+ return pa.date32()
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: TimeType) -> pa.DataType:
+ return pa.time64(unit="ms")
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: TimestampType) -> pa.DataType:
+ return pa.timestamp(unit="ms")
+
+
+@_iceberg_to_pyarrow_type.register
+def _(_: TimestamptzType) -> pa.DataType:
+ return pa.timestamp(unit="ms")
Review Comment:
Should this use `tz="+00:00"`?
Accordint to the docs, `tz=None` indicates a time zone naive type, but this
is specifically timestamps in UTC+0.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]