westonpace commented on a change in pull request #10628:
URL: https://github.com/apache/arrow/pull/10628#discussion_r663270720



##########
File path: python/pyarrow/_dataset.pyx
##########
@@ -3025,14 +3072,68 @@ def _filesystemdataset_write(
         CFileSystemDatasetWriteOptions c_options
         shared_ptr[CScanner] c_scanner
         vector[shared_ptr[CRecordBatch]] c_batches
+        dict visit_args
+        function[cb_writer_finish] c_post_finish_cb
 
     c_options.file_write_options = file_options.unwrap()
     c_options.filesystem = filesystem.unwrap()
     c_options.base_dir = tobytes(_stringify_path(base_dir))
     c_options.partitioning = partitioning.unwrap()
     c_options.max_partitions = max_partitions
     c_options.basename_template = tobytes(basename_template)
+    c_post_finish_cb = _filesystemdataset_write_visitor
+    if file_visitor is not None:
+        visit_args = {'base_dir': c_options.base_dir,
+                      'file_visitor': file_visitor}
+        c_options.writer_post_finish = BindFunction[cb_writer_finish_internal](
+            &_filesystemdataset_write_visitor, visit_args)
 
     c_scanner = data.unwrap()
     with nogil:
         check_status(CFileSystemDataset.Write(c_options, c_scanner))
+
+
+# basic test to roundtrip through a BoundFunction

Review comment:
       I just got rid of this.  It came along with your experiment and I don't 
know that we really need it.  Now that BindFunction is actually used we test it 
indirectly that way.

##########
File path: python/pyarrow/_dataset.pyx
##########
@@ -3009,6 +3009,52 @@ def _get_partition_keys(Expression partition_expression):
     return out
 
 
+ctypedef CParquetFileWriter* _CParquetFileWriterPtr
+
+cdef class WrittenFile(_Weakrefable):
+    """
+    Metadata information about files written as
+    part of a dataset write operation
+    """
+
+    """The full path to the created file"""
+    cdef public str path
+    """If the file is a parquet file this will contain the parquet metadata"""
+    cdef public object metadata
+
+    def __init__(self, path, metadata):
+        self.path = path
+        self.metadata = metadata
+
+cdef void _filesystemdataset_write_visitor(
+        dict visit_args,
+        CFileWriter* file_writer):
+    cdef:
+        str path
+        str base_dir
+        WrittenFile written_file
+        FileMetaData parquet_metadata
+        CParquetFileWriter* parquet_file_writer
+
+    if file_writer == nullptr:
+        return
+
+    parquet_metadata = None
+    path = frombytes(deref(file_writer).destination().path)
+    base_dir = frombytes(visit_args['base_dir'])

Review comment:
       Done.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to