itsjunetime commented on code in PR #6690:
URL: https://github.com/apache/arrow-rs/pull/6690#discussion_r1835114076


##########
arrow-ipc/src/writer.rs:
##########
@@ -1414,44 +1314,73 @@ fn get_buffer_element_width(spec: &BufferSpec) -> usize 
{
     }
 }
 
-/// Common functionality for re-encoding offsets. Returns the new offsets as 
well as
-/// original start offset and length for use in slicing child data.
-fn reencode_offsets<O: OffsetSizeTrait>(
-    offsets: &Buffer,
-    data: &ArrayData,
-) -> (Buffer, usize, usize) {
-    let offsets_slice: &[O] = offsets.typed_data::<O>();
-    let offset_slice = &offsets_slice[data.offset()..data.offset() + 
data.len() + 1];
-
-    let start_offset = offset_slice.first().unwrap();
-    let end_offset = offset_slice.last().unwrap();
-
-    let offsets = match start_offset.as_usize() {
-        0 => offsets.clone(),
-        _ => offset_slice.iter().map(|x| *x - *start_offset).collect(),
-    };
-
-    let start_offset = start_offset.as_usize();
-    let end_offset = end_offset.as_usize();
-
-    (offsets, start_offset, end_offset - start_offset)
-}
-
-/// Returns the values and offsets [`Buffer`] for a ByteArray with offset type 
`O`
+/// Returns the offsets and values [`Buffer`]s for a ByteArray with offset 
type `O`
 ///
 /// In particular, this handles re-encoding the offsets if they don't start at 
`0`,
 /// slicing the values buffer as appropriate. This helps reduce the encoded
 /// size of sliced arrays, as values that have been sliced away are not encoded
-fn get_byte_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, 
Buffer) {
+///
+/// # Panics
+///
+/// Panics if self.buffers does not contain at least 2 buffers (this code 
expects that the
+/// first will contain the offsets for this variable-length array and the 
other will contain
+/// the values)
+pub fn get_byte_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> 
(Buffer, Buffer) {
     if data.is_empty() {
         return (MutableBuffer::new(0).into(), MutableBuffer::new(0).into());
     }
 
-    let (offsets, original_start_offset, len) = 
reencode_offsets::<O>(&data.buffers()[0], data);
+    // get the buffer of offsets, now shifted so they are shifted to be 
accurate to the slice
+    // of values that we'll be taking (e.g. if they previously said [0, 3, 5, 
7], but we slice
+    // to only get the last offset, they'll be shifted to be [0, 2], since 
that would be the
+    // offset pair for the last value in this shifted slice).
+    // also, in this example, original_start_offset would be 5 and len would 
be 2.
+    let (offsets, original_start_offset, len) = reencode_offsets::<O>(data);
     let values = data.buffers()[1].slice_with_length(original_start_offset, 
len);
     (offsets, values)
 }
 
+/// Common functionality for re-encoding offsets. Returns the new offsets as 
well as
+/// original start offset and length for use in slicing child data.
+///
+/// # Panics
+///
+/// Will panic if you call this on an `ArrayData` that does not have a buffer 
of offsets as the
+/// very first buffer (i.e. expects this to be a valid variable-length array)
+pub fn reencode_offsets<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, 
usize, usize) {

Review Comment:
   No - thank you for catching that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to