This is an automated email from the ASF dual-hosted git repository.
timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new 494b89a refactor: from_arrow (#917)
494b89a is described below
commit 494b89a522541bbaf9c3cd5d7b6bd7ab7218a399
Author: Ion Koutsouris <[email protected]>
AuthorDate: Tue Oct 15 13:21:30 2024 +0200
refactor: from_arrow (#917)
---
python/datafusion/context.py | 34 +++++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 957d7e3..5221c86 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -30,7 +30,7 @@ from datafusion.expr import Expr, SortExpr,
sort_list_to_raw_sort_list
from datafusion.record_batch import RecordBatchStream
from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF
-from typing import Any, TYPE_CHECKING
+from typing import Any, TYPE_CHECKING, Protocol
from typing_extensions import deprecated
if TYPE_CHECKING:
@@ -41,6 +41,28 @@ if TYPE_CHECKING:
from datafusion.plan import LogicalPlan, ExecutionPlan
+class ArrowStreamExportable(Protocol):
+ """Type hint for object exporting Arrow C Stream via Arrow PyCapsule
Interface.
+
+ https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+ """
+
+ def __arrow_c_stream__( # noqa: D105
+ self, requested_schema: object | None = None
+ ) -> object: ...
+
+
+class ArrowArrayExportable(Protocol):
+ """Type hint for object exporting Arrow C Array via Arrow PyCapsule
Interface.
+
+ https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+ """
+
+ def __arrow_c_array__( # noqa: D105
+ self, requested_schema: object | None = None
+ ) -> tuple[object, object]: ...
+
+
class SessionConfig:
"""Session configuration options."""
@@ -592,12 +614,18 @@ class SessionContext:
"""
return DataFrame(self.ctx.from_pydict(data, name))
- def from_arrow(self, data: Any, name: str | None = None) -> DataFrame:
+ def from_arrow(
+ self,
+ data: ArrowStreamExportable | ArrowArrayExportable,
+ name: str | None = None,
+ ) -> DataFrame:
"""Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow
source.
The Arrow data source can be any object that implements either
``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it
must return
- a struct array. Common examples of sources from pyarrow include
+ a struct array.
+
+ Arrow data can be Polars, Pandas, Pyarrow etc.
Args:
data: Arrow data source.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]