Jefffrey commented on code in PR #9176:
URL: https://github.com/apache/arrow-rs/pull/9176#discussion_r2704583580
##########
arrow-row/src/list.rs:
##########
@@ -323,3 +316,180 @@ pub unsafe fn decode_fixed_size_list(
builder.build_unchecked()
}))
}
+
+/// Computes the encoded length for a single list element given its child rows.
+///
+/// This is used by list types (List, LargeList, ListView, LargeListView) to
determine
+/// the encoded length of a list element. For null elements, returns 1 (null
sentinel only).
+/// For valid elements, returns 1 + the sum of padded lengths for each child
row.
+#[inline]
+fn list_element_encoded_len(rows: &Rows, range: Option<Range<usize>>) -> usize
{
+ match range {
+ None => 1,
+ Some(range) => {
+ 1 + range
+ .map(|i|
super::variable::padded_length(Some(rows.row(i).as_ref().len())))
+ .sum::<usize>()
+ }
+ }
+}
+
+/// Computes the encoded lengths for a `GenericListViewArray`
+///
+/// `rows` should contain the encoded child elements
+pub fn compute_lengths_list_view<O: OffsetSizeTrait>(
+ lengths: &mut [usize],
+ rows: &Rows,
+ array: &GenericListViewArray<O>,
+ shift: usize,
+) {
+ let offsets = array.value_offsets();
+ let sizes = array.value_sizes();
+
+ lengths.iter_mut().enumerate().for_each(|(idx, length)| {
+ let start = offsets[idx].as_usize() - shift;
+ let size = sizes[idx].as_usize();
+ let range = array.is_valid(idx).then_some(start..start + size);
+ *length += list_element_encoded_len(rows, range);
+ });
+}
+
+/// Encodes the provided `GenericListViewArray` to `out` with the provided
`SortOptions`
+///
+/// `rows` should contain the encoded child elements
+pub fn encode_list_view<O: OffsetSizeTrait>(
+ data: &mut [u8],
+ out_offsets: &mut [usize],
+ rows: &Rows,
+ opts: SortOptions,
+ array: &GenericListViewArray<O>,
+ shift: usize,
+) {
+ let offsets = array.value_offsets();
+ let sizes = array.value_sizes();
+
+ out_offsets
+ .iter_mut()
+ .skip(1)
+ .enumerate()
+ .for_each(|(idx, offset)| {
+ let start = offsets[idx].as_usize() - shift;
+ let size = sizes[idx].as_usize();
+ let range = array.is_valid(idx).then_some(start..start + size);
+ let out = &mut data[*offset..];
+ *offset += encode_one(out, rows, range, opts)
+ });
+}
+
+/// Decodes a `GenericListViewArray` from `rows` with the provided `options`
+///
+/// # Safety
+///
+/// `rows` must contain valid data for the provided `converter`
+pub unsafe fn decode_list_view<O: OffsetSizeTrait>(
+ converter: &RowConverter,
+ rows: &mut [&[u8]],
+ field: &SortField,
+ validate_utf8: bool,
+) -> Result<GenericListViewArray<O>, ArrowError> {
+ let opts = field.options;
+
+ let mut values_bytes = 0;
+
+ let mut child_count = 0usize;
+ let mut list_sizes: Vec<O> = Vec::with_capacity(rows.len());
+
+ // First pass: count children and compute sizes
+ for row in rows.iter_mut() {
+ let mut row_offset = 0;
+ let mut list_size = 0usize;
+ loop {
+ let decoded = super::variable::decode_blocks(&row[row_offset..],
opts, |x| {
+ values_bytes += x.len();
+ });
+ if decoded <= 1 {
+ list_sizes.push(O::usize_as(list_size));
+ break;
+ }
+ row_offset += decoded;
+ child_count += 1;
+ list_size += 1;
+ }
+ }
+ O::from_usize(child_count).expect("overflow");
Review Comment:
Personally I feel it makes more sense to return an error here since the
function already supports that
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]