bkietz commented on a change in pull request #7156:
URL: https://github.com/apache/arrow/pull/7156#discussion_r426817561



##########
File path: python/pyarrow/_dataset.pyx
##########
@@ -42,6 +43,51 @@ def _forbid_instantiation(klass, subclasses_instead=True):
     raise TypeError(msg)
 
 
+ctypedef CResult[shared_ptr[CRandomAccessFile]] CCustomOpen()
+
+cdef class FileSource:
+
+    cdef:
+        # XXX why is shared_ptr necessary here? CFileSource shouldn't need it
+        CFileSource wrapped
+
+    def __cinit__(self, file, FileSystem filesystem=None):
+        cdef:
+            shared_ptr[CFileSystem] c_filesystem
+            c_string c_path
+            function[CCustomOpen] c_open
+            shared_ptr[CBuffer] c_buffer
+
+        if isinstance(file, FileSource):
+            self.wrapped = (<FileSource> file).wrapped
+
+        elif isinstance(file, Buffer):
+            c_buffer = pyarrow_unwrap_buffer(file)
+            self.wrapped = CFileSource(move(c_buffer))
+
+        elif _is_path_like(file):
+            if filesystem is None:
+                raise ValueError("cannot construct a FileSource from "
+                                 "a path without a FileSystem")
+            c_filesystem = filesystem.unwrap()
+            c_path = tobytes(_stringify_path(file))
+            self.wrapped = CFileSource(move(c_path), move(c_filesystem))
+
+        else:
+            c_open = BindMethod[CCustomOpen](
+                wrap_python_file(file, mode='r'),
+                &NativeFile.get_random_access_file)

Review comment:
       Since I can't know the semantics of an arbitrary python file so I'm 
trying to uphold the contract of `FileSource` by remaining as lazy as possible. 
It's true that in the case of constructing a `FileSource` from a `NativeFile` 
the RandomAccessFIle is already held open by `NativeFile` but this isn't 
necessarily the case for all objects which could be wrapped by `NativeFile`. I 
think the potential benefit when handling costly-to-open filesystems/sources is 
worth protecting.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to