scovich commented on code in PR #8354:
URL: https://github.com/apache/arrow-rs/pull/8354#discussion_r2842790603


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -42,16 +45,84 @@ pub(crate) enum ShreddedPathStep<'a> {
     NotShredded,
 }
 
+fn take_list_index_as_shredding_state<O: OffsetSizeTrait>(
+    list_array: &GenericListArray<O>,
+    index: usize,
+    cast_options: &CastOptions,
+) -> Result<Option<ShreddingState>> {
+    let offsets = list_array.offsets();
+    let values = list_array.values();
+
+    let Some(struct_array) = values.as_any().downcast_ref::<StructArray>() 
else {
+        return Ok(None);
+    };
+
+    let value_array = struct_array.column_by_name("value");
+    let typed_array = struct_array.column_by_name("typed_value");
+
+    // If list elements have neither typed nor fallback value, this path step 
is missing.
+    if value_array.is_none() && typed_array.is_none() {
+        return Ok(None);
+    }

Review Comment:
   How does this relate to the VariantArray helper? (should we use it)?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -42,16 +45,84 @@ pub(crate) enum ShreddedPathStep<'a> {
     NotShredded,
 }
 
+fn take_list_index_as_shredding_state<O: OffsetSizeTrait>(
+    list_array: &GenericListArray<O>,
+    index: usize,
+    cast_options: &CastOptions,
+) -> Result<Option<ShreddingState>> {
+    let offsets = list_array.offsets();
+    let values = list_array.values();
+
+    let Some(struct_array) = values.as_any().downcast_ref::<StructArray>() 
else {
+        return Ok(None);
+    };
+
+    let value_array = struct_array.column_by_name("value");
+    let typed_array = struct_array.column_by_name("typed_value");
+
+    // If list elements have neither typed nor fallback value, this path step 
is missing.
+    if value_array.is_none() && typed_array.is_none() {
+        return Ok(None);
+    }
+
+    let mut take_indices = Vec::with_capacity(list_array.len());
+    for row in 0..list_array.len() {
+        let start = offsets[row].as_usize();
+        let end = offsets[row + 1].as_usize();
+        let len = end - start;
+
+        if index < len {
+            let absolute_index = start.checked_add(index).ok_or_else(|| {
+                ArrowError::ComputeError("List index overflow while building 
take indices".into())
+            })?;
+            let absolute_index = u64::try_from(absolute_index)
+                .map_err(|_| ArrowError::ComputeError("List index does not fit 
into u64".into()))?;
+            take_indices.push(Some(absolute_index));
+        } else if cast_options.safe {
+            take_indices.push(None);
+        } else {
+            return Err(ArrowError::CastError(format!(
+                "Cannot access index '{}' for row {} with list length {}",
+                index, row, len
+            )));
+        }
+    }
+
+    let index_array = UInt64Array::from(take_indices);
+
+    // Gather both typed and fallback values at the requested element index.
+    let taken_value = value_array
+        .map(|value| take(value, &index_array, None))

Review Comment:
   aside: TIL about 
[take](https://docs.rs/arrow/latest/arrow/compute/fn.take.html). Very helpful 
here.



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -42,16 +45,84 @@ pub(crate) enum ShreddedPathStep<'a> {
     NotShredded,
 }
 
+fn take_list_index_as_shredding_state<O: OffsetSizeTrait>(
+    list_array: &GenericListArray<O>,
+    index: usize,
+    cast_options: &CastOptions,
+) -> Result<Option<ShreddingState>> {
+    let offsets = list_array.offsets();
+    let values = list_array.values();
+
+    let Some(struct_array) = values.as_any().downcast_ref::<StructArray>() 
else {
+        return Ok(None);
+    };
+
+    let value_array = struct_array.column_by_name("value");
+    let typed_array = struct_array.column_by_name("typed_value");
+
+    // If list elements have neither typed nor fallback value, this path step 
is missing.
+    if value_array.is_none() && typed_array.is_none() {
+        return Ok(None);
+    }
+
+    let mut take_indices = Vec::with_capacity(list_array.len());
+    for row in 0..list_array.len() {
+        let start = offsets[row].as_usize();
+        let end = offsets[row + 1].as_usize();
+        let len = end - start;
+
+        if index < len {
+            let absolute_index = start.checked_add(index).ok_or_else(|| {
+                ArrowError::ComputeError("List index overflow while building 
take indices".into())
+            })?;
+            let absolute_index = u64::try_from(absolute_index)
+                .map_err(|_| ArrowError::ComputeError("List index does not fit 
into u64".into()))?;
+            take_indices.push(Some(absolute_index));
+        } else if cast_options.safe {
+            take_indices.push(None);
+        } else {
+            return Err(ArrowError::CastError(format!(
+                "Cannot access index '{}' for row {} with list length {}",
+                index, row, len
+            )));

Review Comment:
   We need to decide what the out of bounds semantics should be. For example, 
spark just returns NULL.
   
   By way of comparison, spark and arrow-rs both return NULL for non-existent 
struct fields, which could be argued as analogous. Or maybe it's considered 
different and we want the error. Or maybe missing struct fields are also 
handled wrong?
   
   (I'm comfortable with following spark semantics, but would love to hear 
others' thoughts)



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -96,15 +167,35 @@ pub(crate) fn follow_shredded_path_element<'a>(
                 ))
             })?;
 
-            let state = BorrowedShreddingState::try_from(struct_array)?;
+            let state = ShreddingState::try_from(struct_array)?;
             Ok(ShreddedPathStep::Success(state))
         }
-        VariantPathElement::Index { .. } => {
-            // TODO: Support array indexing. Among other things, it will 
require slicing not
-            // only the array we have here, but also the corresponding 
metadata and null masks.
-            Err(ArrowError::NotYetImplemented(
-                "Pathing into shredded variant array index".into(),
-            ))
+        VariantPathElement::Index { index } => {
+            let state = if let Some(list_array) =
+                typed_value.as_any().downcast_ref::<GenericListArray<i32>>()
+            {
+                take_list_index_as_shredding_state(list_array, *index, 
cast_options)?
+            } else if let Some(list_array) =
+                typed_value.as_any().downcast_ref::<GenericListArray<i64>>()
+            {
+                take_list_index_as_shredding_state(list_array, *index, 
cast_options)?
+            } else {
+                // Downcast failure - if strict cast options are enabled, this 
should be an error
+                if !cast_options.safe {
+                    return Err(ArrowError::CastError(format!(

Review Comment:
   What about list view arrays?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -96,15 +167,35 @@ pub(crate) fn follow_shredded_path_element<'a>(
                 ))
             })?;
 
-            let state = BorrowedShreddingState::try_from(struct_array)?;
+            let state = ShreddingState::try_from(struct_array)?;
             Ok(ShreddedPathStep::Success(state))
         }
-        VariantPathElement::Index { .. } => {
-            // TODO: Support array indexing. Among other things, it will 
require slicing not
-            // only the array we have here, but also the corresponding 
metadata and null masks.
-            Err(ArrowError::NotYetImplemented(
-                "Pathing into shredded variant array index".into(),
-            ))
+        VariantPathElement::Index { index } => {
+            let state = if let Some(list_array) =
+                typed_value.as_any().downcast_ref::<GenericListArray<i32>>()
+            {
+                take_list_index_as_shredding_state(list_array, *index, 
cast_options)?
+            } else if let Some(list_array) =
+                typed_value.as_any().downcast_ref::<GenericListArray<i64>>()
+            {
+                take_list_index_as_shredding_state(list_array, *index, 
cast_options)?
+            } else {
+                // Downcast failure - if strict cast options are enabled, this 
should be an error
+                if !cast_options.safe {
+                    return Err(ArrowError::CastError(format!(

Review Comment:
   Also, why not follow the same else-if chain as above?
   ```rust
   } else if cast_options.safe {
       return Ok(...);
   } else {
      return Err(...);
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to