pitrou commented on a change in pull request #7156:
URL: https://github.com/apache/arrow/pull/7156#discussion_r426727244
##########
File path: python/pyarrow/_dataset.pyx
##########
@@ -42,6 +43,51 @@ def _forbid_instantiation(klass, subclasses_instead=True):
raise TypeError(msg)
+ctypedef CResult[shared_ptr[CRandomAccessFile]] CCustomOpen()
+
+cdef class FileSource:
+
+ cdef:
+ # XXX why is shared_ptr necessary here? CFileSource shouldn't need it
+ CFileSource wrapped
+
+ def __cinit__(self, file, FileSystem filesystem=None):
+ cdef:
+ shared_ptr[CFileSystem] c_filesystem
+ c_string c_path
+ function[CCustomOpen] c_open
+ shared_ptr[CBuffer] c_buffer
+
+ if isinstance(file, FileSource):
+ self.wrapped = (<FileSource> file).wrapped
+
+ elif isinstance(file, Buffer):
+ c_buffer = pyarrow_unwrap_buffer(file)
+ self.wrapped = CFileSource(move(c_buffer))
+
+ elif _is_path_like(file):
+ if filesystem is None:
+ raise ValueError("cannot construct a FileSource from "
+ "a path without a FileSystem")
+ c_filesystem = filesystem.unwrap()
+ c_path = tobytes(_stringify_path(file))
+ self.wrapped = CFileSource(move(c_path), move(c_filesystem))
+
+ else:
+ c_open = BindMethod[CCustomOpen](
+ wrap_python_file(file, mode='r'),
+ &NativeFile.get_random_access_file)
Review comment:
I agree with the FileSource intent, however here you already have a
Python file object, you're just wrapping it in an Arrow file object, IIUC. So I
don't think lazying consructing the Arrow file object is of much value.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]