ntjohnson1 commented on code in PR #1474:
URL:
https://github.com/apache/datafusion-python/pull/1474#discussion_r3040715753
##########
python/datafusion/context.py:
##########
@@ -1092,6 +1112,49 @@ def register_avro(
name, str(path), schema, file_extension, table_partition_cols
)
+ def register_arrow(
+ self,
+ name: str,
+ path: str | pathlib.Path,
+ schema: pa.Schema | None = None,
+ file_extension: str = ".arrow",
+ table_partition_cols: list[tuple[str, str | pa.DataType]] | None =
None,
+ ) -> None:
+ """Register an Arrow IPC file as a table.
+
+ The registered table can be referenced from SQL statements executed
+ against this context.
+
+ Args:
+ name: Name of the table to register.
+ path: Path to the Arrow IPC file.
+ schema: The data source schema.
+ file_extension: File extension to select.
+ table_partition_cols: Partition columns.
+
+ Examples:
Review Comment:
Doesn't demonstrate optional arguments but they are fairly clear
##########
python/datafusion/context.py:
##########
@@ -1328,6 +1391,65 @@ def read_avro(
self.ctx.read_avro(str(path), schema, file_partition_cols,
file_extension)
)
+ def read_arrow(
+ self,
+ path: str | pathlib.Path,
+ schema: pa.Schema | None = None,
+ file_extension: str = ".arrow",
+ file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
+ ) -> DataFrame:
+ """Create a :py:class:`DataFrame` for reading an Arrow IPC data source.
+
+ Args:
+ path: Path to the Arrow IPC file.
+ schema: The data source schema.
+ file_extension: File extension to select.
+ file_partition_cols: Partition columns.
+
+ Returns:
+ DataFrame representation of the read Arrow IPC file.
+
+ Examples:
Review Comment:
same optional arguments note
##########
conftest.py:
##########
@@ -29,6 +30,8 @@ def _doctest_namespace(doctest_namespace: dict) -> None:
"""Add common imports to the doctest namespace."""
doctest_namespace["dfn"] = dfn
doctest_namespace["np"] = np
+ doctest_namespace["pa"] = pa
Review Comment:
NIT: Would be nice to update all the existing examples to take advantage of
the new common imports but can be a follow up to ask claude to do
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]