Dandandan commented on code in PR #7564:
URL: https://github.com/apache/arrow-rs/pull/7564#discussion_r2113628471


##########
arrow-row/src/lib.rs:
##########
@@ -1173,88 +1148,180 @@ fn null_sentinel(options: SortOptions) -> u8 {
     }
 }
 
+enum LengthTracker {
+    Fixed {
+        length: usize,
+        num_rows: usize,
+    },
+    Variable {
+        fixed_length: usize,
+        lengths: Vec<usize>,
+    }
+}
+
+impl LengthTracker {
+    fn new(num_rows: usize) -> Self {
+        Self::Fixed { length: 0, num_rows }
+    }
+
+    fn push_fixed(&mut self, new_length: usize) {
+        match self {
+            LengthTracker::Fixed { length, .. } => *length += new_length,
+            LengthTracker::Variable { fixed_length, .. } => *fixed_length += 
new_length,
+        }
+    }
+
+    fn push_variable(&mut self, new_lengths: impl ExactSizeIterator<Item = 
usize>) {
+        match self {
+            LengthTracker::Fixed { length, .. } => {
+                // todo: avoid materialization if all items of new_lengths are 
same
+                *self = LengthTracker::Variable { fixed_length: *length, 
lengths: new_lengths.collect() }
+            },
+            LengthTracker::Variable { lengths, .. } => {
+                assert_eq!(lengths.len(), new_lengths.len());
+                
lengths.iter_mut().zip(new_lengths.into_iter()).for_each(|(length, new_length)| 
*length += new_length);
+            }
+        }
+    }
+
+    fn materialized(&mut self) -> &mut [usize] {
+        if let LengthTracker::Fixed { length, num_rows } = *self {
+            *self = LengthTracker::Variable { 
+                fixed_length: length, 
+                lengths: vec![0; num_rows]
+            };
+        }
+        
+        match self {
+            LengthTracker::Variable { lengths, .. } => lengths,
+            LengthTracker::Fixed { .. } => unreachable!(),
+        }
+    }
+
+    // We initialize the offsets shifted down by one row index.
+    //
+    // As the rows are appended to the offsets will be incremented to match
+    //
+    // For example, consider the case of 3 rows of length 3, 4, and 6 
respectively.
+    // The offsets would be initialized to `0, 0, 3, 7`
+    //
+    // Writing the first row entirely would yield `0, 3, 3, 7`
+    // The second, `0, 3, 7, 7`
+    // The third, `0, 3, 7, 13`
+    //
+    // This would be the final offsets for reading
+    //
+    // In this way offsets tracks the position during writing whilst 
eventually serving
+    fn extend_offsets(&self, initial_offset: usize, offsets: &mut Vec<usize>) 
-> usize {
+        match self {
+            LengthTracker::Fixed { length, num_rows } => {
+                offsets.extend(
+                    (0..*num_rows).map(|i| initial_offset + i * length)
+                );
+                
+                initial_offset + num_rows * length
+            },
+            LengthTracker::Variable { fixed_length, lengths } => {
+                offsets.reserve(lengths.len());
+
+                let mut acc = initial_offset;
+                for length in lengths {

Review Comment:
   this can use `extend` as well (then `reserve` is also not needed)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to