westonpace commented on a change in pull request #10628:
URL: https://github.com/apache/arrow/pull/10628#discussion_r663271097
##########
File path: python/pyarrow/_dataset.pyx
##########
@@ -3009,6 +3009,52 @@ def _get_partition_keys(Expression partition_expression):
return out
+ctypedef CParquetFileWriter* _CParquetFileWriterPtr
+
+cdef class WrittenFile(_Weakrefable):
+ """
+ Metadata information about files written as
+ part of a dataset write operation
+ """
+
+ """The full path to the created file"""
+ cdef public str path
+ """If the file is a parquet file this will contain the parquet metadata"""
+ cdef public object metadata
+
+ def __init__(self, path, metadata):
+ self.path = path
+ self.metadata = metadata
+
+cdef void _filesystemdataset_write_visitor(
+ dict visit_args,
+ CFileWriter* file_writer):
+ cdef:
+ str path
+ str base_dir
+ WrittenFile written_file
+ FileMetaData parquet_metadata
+ CParquetFileWriter* parquet_file_writer
+
+ if file_writer == nullptr:
Review comment:
Yep, I was thinking of writing a dataset to something other than a
filesystem but there is no reason to think it would use a the same method.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]