This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 9633f14f8d add finish to AsyncArrowWriter with test (#6543)
9633f14f8d is described below
commit 9633f14f8d9e5b9b8fdf3514b7acb77217b30584
Author: Ed Seidl <[email protected]>
AuthorDate: Sat Oct 12 07:24:48 2024 -0700
add finish to AsyncArrowWriter with test (#6543)
---
parquet/src/arrow/async_writer/mod.rs | 35 ++++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/parquet/src/arrow/async_writer/mod.rs
b/parquet/src/arrow/async_writer/mod.rs
index 50bb5c0463..8155b57d9a 100644
--- a/parquet/src/arrow/async_writer/mod.rs
+++ b/parquet/src/arrow/async_writer/mod.rs
@@ -239,7 +239,11 @@ impl<W: AsyncFileWriter> AsyncArrowWriter<W> {
/// Close and finalize the writer.
///
/// All the data in the inner buffer will be force flushed.
- pub async fn close(mut self) -> Result<FileMetaData> {
+ ///
+ /// Unlike [`Self::close`] this does not consume self
+ ///
+ /// Attempting to write after calling finish will result in an error
+ pub async fn finish(&mut self) -> Result<FileMetaData> {
let metadata = self.sync_writer.finish()?;
// Force to flush the remaining data.
@@ -249,6 +253,13 @@ impl<W: AsyncFileWriter> AsyncArrowWriter<W> {
Ok(metadata)
}
+ /// Close and finalize the writer.
+ ///
+ /// All the data in the inner buffer will be force flushed.
+ pub async fn close(mut self) -> Result<FileMetaData> {
+ self.finish().await
+ }
+
/// Flush the data written by `sync_writer` into the `async_writer`
///
/// # Notes
@@ -385,6 +396,28 @@ mod tests {
}
}
+ #[tokio::test]
+ async fn test_async_writer_bytes_written() {
+ let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as
ArrayRef;
+ let to_write = RecordBatch::try_from_iter([("col", col)]).unwrap();
+
+ let temp = tempfile::tempfile().unwrap();
+
+ let file = tokio::fs::File::from_std(temp.try_clone().unwrap());
+ let mut writer =
+ AsyncArrowWriter::try_new(file.try_clone().await.unwrap(),
to_write.schema(), None)
+ .unwrap();
+ writer.write(&to_write).await.unwrap();
+ let _metadata = writer.finish().await.unwrap();
+ // After `finish` this should include the metadata and footer
+ let reported = writer.bytes_written();
+
+ // Get actual size from file metadata
+ let actual = file.metadata().await.unwrap().len() as usize;
+
+ assert_eq!(reported, actual);
+ }
+
#[tokio::test]
async fn test_async_writer_file() {
let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as
ArrayRef;