Dandandan commented on code in PR #7564: URL: https://github.com/apache/arrow-rs/pull/7564#discussion_r2113628471
########## arrow-row/src/lib.rs: ########## @@ -1173,88 +1148,180 @@ fn null_sentinel(options: SortOptions) -> u8 { } } +enum LengthTracker { + Fixed { + length: usize, + num_rows: usize, + }, + Variable { + fixed_length: usize, + lengths: Vec<usize>, + } +} + +impl LengthTracker { + fn new(num_rows: usize) -> Self { + Self::Fixed { length: 0, num_rows } + } + + fn push_fixed(&mut self, new_length: usize) { + match self { + LengthTracker::Fixed { length, .. } => *length += new_length, + LengthTracker::Variable { fixed_length, .. } => *fixed_length += new_length, + } + } + + fn push_variable(&mut self, new_lengths: impl ExactSizeIterator<Item = usize>) { + match self { + LengthTracker::Fixed { length, .. } => { + // todo: avoid materialization if all items of new_lengths are same + *self = LengthTracker::Variable { fixed_length: *length, lengths: new_lengths.collect() } + }, + LengthTracker::Variable { lengths, .. } => { + assert_eq!(lengths.len(), new_lengths.len()); + lengths.iter_mut().zip(new_lengths.into_iter()).for_each(|(length, new_length)| *length += new_length); + } + } + } + + fn materialized(&mut self) -> &mut [usize] { + if let LengthTracker::Fixed { length, num_rows } = *self { + *self = LengthTracker::Variable { + fixed_length: length, + lengths: vec![0; num_rows] + }; + } + + match self { + LengthTracker::Variable { lengths, .. } => lengths, + LengthTracker::Fixed { .. } => unreachable!(), + } + } + + // We initialize the offsets shifted down by one row index. + // + // As the rows are appended to the offsets will be incremented to match + // + // For example, consider the case of 3 rows of length 3, 4, and 6 respectively. + // The offsets would be initialized to `0, 0, 3, 7` + // + // Writing the first row entirely would yield `0, 3, 3, 7` + // The second, `0, 3, 7, 7` + // The third, `0, 3, 7, 13` + // + // This would be the final offsets for reading + // + // In this way offsets tracks the position during writing whilst eventually serving + fn extend_offsets(&self, initial_offset: usize, offsets: &mut Vec<usize>) -> usize { + match self { + LengthTracker::Fixed { length, num_rows } => { + offsets.extend( + (0..*num_rows).map(|i| initial_offset + i * length) + ); + + initial_offset + num_rows * length + }, + LengthTracker::Variable { fixed_length, lengths } => { + offsets.reserve(lengths.len()); + + let mut acc = initial_offset; + for length in lengths { Review Comment: this can use `extend` as well (then `reserve` is also not needed) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org