Rich-T-kid commented on code in PR #10044:
URL: https://github.com/apache/arrow-rs/pull/10044#discussion_r3350879572


##########
arrow-ipc/src/writer.rs:
##########
@@ -693,19 +734,177 @@ impl IpcDataGenerator {
             let mut message_builder = crate::MessageBuilder::new(&mut fbb);
             message_builder.add_version(write_options.metadata_version);
             
message_builder.add_header_type(crate::MessageHeader::DictionaryBatch);
-            message_builder.add_bodyLength(arrow_data.len() as i64);
+            message_builder.add_bodyLength(body_len as i64);
             message_builder.add_header(root);
             message_builder.finish()
         };
 
         fbb.finish(root, None);
         let finished_data = fbb.finished_data();
 
+        let mut arrow_data: Vec<u8> = Vec::with_capacity(body_len);
+        for encoded in &encoded_buffers {
+            arrow_data.extend_from_slice(encoded.as_slice());
+            arrow_data.extend_from_slice(
+                &PADDING[..pad_to_alignment(write_options.alignment, 
encoded.len())],
+            );
+        }
+        arrow_data.extend_from_slice(&PADDING[..tail_pad]);
+
         Ok(EncodedData {
             ipc_message: finished_data.to_vec(),
             arrow_data,
         })
     }
+
+    /// Write dictionaries and record batch's directly to `writer`, skipping 
the
+    /// intermediate `arrow_data: Vec<u8>` accumulator used by 
[`Self::record_batch_to_bytes`].
+    ///
+    /// For the uncompressed path each array buffer is held as an Arc-backed 
slice and
+    /// written straight to `writer`. one copy instead of two.  For the 
compressed path
+    /// each buffer is compressed into a per-buffer scratch `Vec<u8>` and 
written from
+    /// there, eliminating the extra copy that `write_buffer` -> `arrow_data` 
->
+    /// `write_body_buffers` would otherwise incur.
+    fn write_batch_direct<W: Write>(

Review Comment:
   This makes sense to me. the main issue is that `FileWriter` needs metadata 
while both `StreamWriter` and arrow-flight do not. Its better to not compute 
metadata the caller will not use but the slowdown should be negligible. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to