sdf-jkl commented on code in PR #8354:
URL: https://github.com/apache/arrow-rs/pull/8354#discussion_r2843510814
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -42,16 +45,84 @@ pub(crate) enum ShreddedPathStep<'a> {
NotShredded,
}
+fn take_list_index_as_shredding_state<O: OffsetSizeTrait>(
+ list_array: &GenericListArray<O>,
+ index: usize,
+ cast_options: &CastOptions,
+) -> Result<Option<ShreddingState>> {
+ let offsets = list_array.offsets();
+ let values = list_array.values();
+
+ let Some(struct_array) = values.as_any().downcast_ref::<StructArray>()
else {
+ return Ok(None);
+ };
+
+ let value_array = struct_array.column_by_name("value");
+ let typed_array = struct_array.column_by_name("typed_value");
+
+ // If list elements have neither typed nor fallback value, this path step
is missing.
+ if value_array.is_none() && typed_array.is_none() {
+ return Ok(None);
+ }
+
+ let mut take_indices = Vec::with_capacity(list_array.len());
+ for row in 0..list_array.len() {
+ let start = offsets[row].as_usize();
+ let end = offsets[row + 1].as_usize();
+ let len = end - start;
+
+ if index < len {
+ let absolute_index = start.checked_add(index).ok_or_else(|| {
+ ArrowError::ComputeError("List index overflow while building
take indices".into())
+ })?;
+ let absolute_index = u64::try_from(absolute_index)
+ .map_err(|_| ArrowError::ComputeError("List index does not fit
into u64".into()))?;
+ take_indices.push(Some(absolute_index));
+ } else if cast_options.safe {
+ take_indices.push(None);
+ } else {
+ return Err(ArrowError::CastError(format!(
+ "Cannot access index '{}' for row {} with list length {}",
+ index, row, len
+ )));
Review Comment:
I support following the spark semantics too
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]