This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 494b89a  refactor: from_arrow (#917)
494b89a is described below

commit 494b89a522541bbaf9c3cd5d7b6bd7ab7218a399
Author: Ion Koutsouris <[email protected]>
AuthorDate: Tue Oct 15 13:21:30 2024 +0200

    refactor: from_arrow (#917)
---
 python/datafusion/context.py | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 957d7e3..5221c86 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -30,7 +30,7 @@ from datafusion.expr import Expr, SortExpr, 
sort_list_to_raw_sort_list
 from datafusion.record_batch import RecordBatchStream
 from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF
 
-from typing import Any, TYPE_CHECKING
+from typing import Any, TYPE_CHECKING, Protocol
 from typing_extensions import deprecated
 
 if TYPE_CHECKING:
@@ -41,6 +41,28 @@ if TYPE_CHECKING:
     from datafusion.plan import LogicalPlan, ExecutionPlan
 
 
+class ArrowStreamExportable(Protocol):
+    """Type hint for object exporting Arrow C Stream via Arrow PyCapsule 
Interface.
+
+    https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+    """
+
+    def __arrow_c_stream__(  # noqa: D105
+        self, requested_schema: object | None = None
+    ) -> object: ...
+
+
+class ArrowArrayExportable(Protocol):
+    """Type hint for object exporting Arrow C Array via Arrow PyCapsule 
Interface.
+
+    https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+    """
+
+    def __arrow_c_array__(  # noqa: D105
+        self, requested_schema: object | None = None
+    ) -> tuple[object, object]: ...
+
+
 class SessionConfig:
     """Session configuration options."""
 
@@ -592,12 +614,18 @@ class SessionContext:
         """
         return DataFrame(self.ctx.from_pydict(data, name))
 
-    def from_arrow(self, data: Any, name: str | None = None) -> DataFrame:
+    def from_arrow(
+        self,
+        data: ArrowStreamExportable | ArrowArrayExportable,
+        name: str | None = None,
+    ) -> DataFrame:
         """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow 
source.
 
         The Arrow data source can be any object that implements either
         ``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it 
must return
-        a struct array. Common examples of sources from pyarrow include
+        a struct array.
+
+        Arrow data can be Polars, Pandas, Pyarrow etc.
 
         Args:
             data: Arrow data source.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to