klion26 commented on code in PR #8031:
URL: https://github.com/apache/arrow-rs/pull/8031#discussion_r2250449783


##########
parquet-variant/src/builder.rs:
##########
@@ -1462,49 +1441,36 @@ impl<'a> ObjectBuilder<'a> {
         let num_fields = self.fields.len();
         let is_large = num_fields > u8::MAX as usize;
 
-        let header_size = 1 + // header byte
-            (if is_large { 4 } else { 1 }) + // num_fields
-            (num_fields * id_size as usize) + // field IDs
-            ((num_fields + 1) * offset_size as usize); // field offsets + 
data_size
+        let num_fileds_size = if is_large { 4 } else { 1 }; // is_large: 4 
bytes, else 1 byte.
 
-        let starting_offset = self.parent_value_offset_base;
+        let num_fields_bytes = num_fields.to_le_bytes();
+        let num_elements_bytes = 
num_fields_bytes.iter().take(num_fileds_size).copied();
 
-        // Shift existing data to make room for the header
-        let buffer = parent_buffer.inner_mut();
-        buffer.splice(
-            starting_offset..starting_offset,
-            std::iter::repeat_n(0u8, header_size),
+        let fields = PackedU32Iterator::new(
+            id_size as usize,
+            self.fields.keys().map(|offset| offset.to_le_bytes()),
+        );
+        let offsets = PackedU32Iterator::new(
+            offset_size as usize,
+            self.fields
+                .values()
+                .map(|offset| (*offset as u32).to_le_bytes()),
         );
 
-        // Write header at the original start position
-        let mut header_pos = starting_offset;
-
-        // Write header byte
+        let data_size_bytes = (data_size as u32).to_le_bytes();
+        let data_size_bytes_iter = data_size_bytes.iter().take(offset_size as 
usize).copied();
         let header = object_header(is_large, id_size, offset_size);
+        let bytess_to_splice = std::iter::once(header)
+            .chain(num_elements_bytes)
+            .chain(fields)
+            .chain(offsets)
+            .chain(data_size_bytes_iter);
+
+        let starting_offset = self.parent_value_offset_base;
+        // Shift existing data to make room for the header
+        let buffer = parent_buffer.inner_mut();
+        buffer.splice(starting_offset..starting_offset, bytess_to_splice);
 
-        header_pos = self
-            .parent_state
-            .buffer()
-            .append_header_start_from_buf_pos(header_pos, header, is_large, 
num_fields);
-
-        header_pos = self
-            .parent_state
-            .buffer()
-            .append_offset_array_start_from_buf_pos(

Review Comment:
   Will change the function name after this pr finalized.



##########
parquet-variant/src/builder.rs:
##########
@@ -64,19 +64,55 @@ fn write_offset(buf: &mut Vec<u8>, value: usize, nbytes: 
u8) {
     buf.extend_from_slice(&bytes[..nbytes as usize]);
 }
 
-/// Write little-endian integer to buffer at a specific position
-fn write_offset_at_pos(buf: &mut [u8], start_pos: usize, value: usize, nbytes: 
u8) {
-    let bytes = value.to_le_bytes();
-    buf[start_pos..start_pos + nbytes as 
usize].copy_from_slice(&bytes[..nbytes as usize]);
-}
-
 /// Append `value_size` bytes of given `value` into `dest`.
 fn append_packed_u32(dest: &mut Vec<u8>, value: u32, value_size: usize) {
     let n = dest.len() + value_size;
     dest.extend(value.to_le_bytes());
     dest.truncate(n);
 }
 
+/// An iterator that yields the bytes of a packed u32 iterator.
+/// Will yield the first `packed_bytes` bytes of each item in the iterator.
+struct PackedU32Iterator<T: Iterator<Item = [u8; 4]>> {
+    packed_bytes: usize,
+    iterator: T,
+    current_item: [u8; 4],
+    current_byte: usize, // 0..3
+}
+
+impl<T: Iterator<Item = [u8; 4]>> PackedU32Iterator<T> {
+    fn new(packed_bytes: usize, iterator: T) -> Self {

Review Comment:
   Thanks for pointing it out, adding assert is indeed a better behavior



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to