This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 9633f14f8d add finish to AsyncArrowWriter with test (#6543)
9633f14f8d is described below

commit 9633f14f8d9e5b9b8fdf3514b7acb77217b30584
Author: Ed Seidl <[email protected]>
AuthorDate: Sat Oct 12 07:24:48 2024 -0700

    add finish to AsyncArrowWriter with test (#6543)
---
 parquet/src/arrow/async_writer/mod.rs | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/parquet/src/arrow/async_writer/mod.rs 
b/parquet/src/arrow/async_writer/mod.rs
index 50bb5c0463..8155b57d9a 100644
--- a/parquet/src/arrow/async_writer/mod.rs
+++ b/parquet/src/arrow/async_writer/mod.rs
@@ -239,7 +239,11 @@ impl<W: AsyncFileWriter> AsyncArrowWriter<W> {
     /// Close and finalize the writer.
     ///
     /// All the data in the inner buffer will be force flushed.
-    pub async fn close(mut self) -> Result<FileMetaData> {
+    ///
+    /// Unlike [`Self::close`] this does not consume self
+    ///
+    /// Attempting to write after calling finish will result in an error
+    pub async fn finish(&mut self) -> Result<FileMetaData> {
         let metadata = self.sync_writer.finish()?;
 
         // Force to flush the remaining data.
@@ -249,6 +253,13 @@ impl<W: AsyncFileWriter> AsyncArrowWriter<W> {
         Ok(metadata)
     }
 
+    /// Close and finalize the writer.
+    ///
+    /// All the data in the inner buffer will be force flushed.
+    pub async fn close(mut self) -> Result<FileMetaData> {
+        self.finish().await
+    }
+
     /// Flush the data written by `sync_writer` into the `async_writer`
     ///
     /// # Notes
@@ -385,6 +396,28 @@ mod tests {
         }
     }
 
+    #[tokio::test]
+    async fn test_async_writer_bytes_written() {
+        let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as 
ArrayRef;
+        let to_write = RecordBatch::try_from_iter([("col", col)]).unwrap();
+
+        let temp = tempfile::tempfile().unwrap();
+
+        let file = tokio::fs::File::from_std(temp.try_clone().unwrap());
+        let mut writer =
+            AsyncArrowWriter::try_new(file.try_clone().await.unwrap(), 
to_write.schema(), None)
+                .unwrap();
+        writer.write(&to_write).await.unwrap();
+        let _metadata = writer.finish().await.unwrap();
+        // After `finish` this should include the metadata and footer
+        let reported = writer.bytes_written();
+
+        // Get actual size from file metadata
+        let actual = file.metadata().await.unwrap().len() as usize;
+
+        assert_eq!(reported, actual);
+    }
+
     #[tokio::test]
     async fn test_async_writer_file() {
         let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as 
ArrayRef;

Reply via email to