Xuanwo commented on code in PR #5431:
URL: https://github.com/apache/arrow-rs/pull/5431#discussion_r1502302850
##########
object_store/src/buffered.rs:
##########
@@ -205,6 +205,138 @@ impl AsyncBufRead for BufReader {
}
}
+/// An async buffered writer compatible with the tokio IO traits
+///
+/// Up to `capacity` bytes will be buffered in memory, and flushed on shutdown
+/// using [`ObjectStore::put`]. If `capacity` is exceeded, data will instead be
+/// streamed using [`ObjectStore::put_multipart`]
+pub struct BufWriter {
+ capacity: usize,
+ state: BufWriterState,
+ multipart_id: Option<MultipartId>,
+ store: Arc<dyn ObjectStore>,
+}
+
+impl std::fmt::Debug for BufWriter {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("BufWriter")
+ .field("capacity", &self.capacity)
+ .field("multipart_id", &self.multipart_id)
+ .finish()
+ }
+}
+
+type MultipartResult = (MultipartId, Box<dyn AsyncWrite + Send + Unpin>);
+
+enum BufWriterState {
+ /// Buffer up to capacity bytes
+ Buffer(Path, Vec<u8>),
+ /// [`ObjectStore::put_multipart`]
+ Prepare(BoxFuture<'static, std::io::Result<MultipartResult>>),
+ /// Write to a multipart upload
+ Write(Box<dyn AsyncWrite + Send + Unpin>),
+ /// [`ObjectStore::put`]
+ Put(BoxFuture<'static, std::io::Result<()>>),
+}
+
+impl BufWriter {
+ /// Create a new [`BufWriter`] from the provided [`ObjectStore`] and
[`Path`]
+ pub fn new(store: Arc<dyn ObjectStore>, path: Path) -> Self {
+ Self::with_capacity(store, path, 10 * 1024 * 1024)
+ }
+
+ /// Create a new [`BufWriter`] from the provided [`ObjectStore`], [`Path`]
and `capacity`
+ pub fn with_capacity(store: Arc<dyn ObjectStore>, path: Path, capacity:
usize) -> Self {
+ Self {
+ capacity,
+ store,
+ state: BufWriterState::Buffer(path, Vec::with_capacity(1024)),
Review Comment:
It's by design to keep a `1024`B buffer?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]