klion26 commented on code in PR #8354:
URL: https://github.com/apache/arrow-rs/pull/8354#discussion_r2853718840


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -1848,6 +1943,246 @@ mod test {
         assert_eq!(&result, &expected);
     }
 
+    /// This test uses a pre-shredded list array and validates index-path 
access.
+    #[test]
+    fn test_shredded_list_index_access() {
+        let array = shredded_list_variant_array();
+        // Test: Extract the 0 index field as VariantArray first
+        let options = GetOptions::new_with_path(VariantPath::from(0));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result_variant.len(), 2);
+
+        // Row 0: expect 0 index = "comedy"
+        assert_eq!(result_variant.value(0), Variant::from("comedy"));
+        // Row 1: expect 0 index = "horror"
+        assert_eq!(result_variant.value(1), Variant::from("horror"));
+    }
+
+    /// Test extracting shredded list field with type conversion.
+    #[test]
+    fn test_shredded_list_as_string() {
+        let array = shredded_list_variant_array();
+        // Test: Extract the 0 index values as StringArray (type conversion)
+        let field = Field::new("typed_value", DataType::Utf8, false);
+        let options = GetOptions::new_with_path(VariantPath::from(0))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+        // Should get StringArray
+        let expected: ArrayRef = 
Arc::new(StringArray::from(vec![Some("comedy"), Some("horror")]));
+        assert_eq!(&result, &expected);
+    }
+
+    #[test]
+    fn test_shredded_list_index_access_from_value_field() {
+        let array = shredded_list_variant_array();
+        // Index 1 maps to "drama" for row 0, and to fallback value 123 for 
row 1.
+        let options = GetOptions::new_with_path(VariantPath::from(1));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+
+        assert_eq!(result_variant.value(0), Variant::from("drama"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+    }
+
+    #[test]
+    fn test_shredded_list_index_access_from_value_field_as_int64() {
+        let array = shredded_list_variant_array();
+        let field = Field::new("typed_value", DataType::Int64, true);
+        let options = GetOptions::new_with_path(VariantPath::from(1))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        // "drama" -> NULL, 123 -> 123.
+        let expected: ArrayRef = Arc::new(Int64Array::from(vec![None, 
Some(123)]));
+        assert_eq!(&result, &expected);
+    }
+
+    // The tests below are temp before: 
https://github.com/apache/arrow-rs/issues/9455
+    #[test]
+    fn test_shredded_list_index_out_of_bounds_unsafe_cast_errors() {
+        let options =
+            
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+                safe: false,
+                ..Default::default()
+            });
+
+        let err = variant_get(&shredded_list_variant_array(), 
options.clone()).unwrap_err();
+        assert!(err.to_string().contains("Cannot access index '10'"));
+    }
+
+    #[test]
+    fn test_large_list_index_path_helper() {
+        let large_list = large_list_of_shredded_elements();
+        let state = take_list_index_as_shredding_state(&large_list, 1, 
&CastOptions::default())
+            .unwrap()
+            .unwrap();
+
+        let typed = state
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        let value = state.value_field().unwrap();

Review Comment:
   Do we need to assert the length of `typed/value` here?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -42,16 +45,84 @@ pub(crate) enum ShreddedPathStep<'a> {
     NotShredded,
 }
 
+fn take_list_index_as_shredding_state<O: OffsetSizeTrait>(
+    list_array: &GenericListArray<O>,
+    index: usize,
+    cast_options: &CastOptions,
+) -> Result<Option<ShreddingState>> {
+    let offsets = list_array.offsets();
+    let values = list_array.values();
+
+    let Some(struct_array) = values.as_any().downcast_ref::<StructArray>() 
else {
+        return Ok(None);
+    };
+
+    let value_array = struct_array.column_by_name("value");
+    let typed_array = struct_array.column_by_name("typed_value");
+
+    // If list elements have neither typed nor fallback value, this path step 
is missing.
+    if value_array.is_none() && typed_array.is_none() {
+        return Ok(None);
+    }
+
+    let mut take_indices = Vec::with_capacity(list_array.len());
+    for row in 0..list_array.len() {
+        let start = offsets[row].as_usize();
+        let end = offsets[row + 1].as_usize();
+        let len = end - start;
+
+        if index < len {
+            let absolute_index = start.checked_add(index).ok_or_else(|| {
+                ArrowError::ComputeError("List index overflow while building 
take indices".into())
+            })?;
+            let absolute_index = u64::try_from(absolute_index)
+                .map_err(|_| ArrowError::ComputeError("List index does not fit 
into u64".into()))?;
+            take_indices.push(Some(absolute_index));
+        } else if cast_options.safe {
+            take_indices.push(None);
+        } else {
+            return Err(ArrowError::CastError(format!(
+                "Cannot access index '{}' for row {} with list length {}",
+                index, row, len
+            )));

Review Comment:
   Do we need to add the behavior to the document or somewhere else?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -1848,6 +1943,246 @@ mod test {
         assert_eq!(&result, &expected);
     }
 
+    /// This test uses a pre-shredded list array and validates index-path 
access.
+    #[test]
+    fn test_shredded_list_index_access() {
+        let array = shredded_list_variant_array();
+        // Test: Extract the 0 index field as VariantArray first
+        let options = GetOptions::new_with_path(VariantPath::from(0));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result_variant.len(), 2);
+
+        // Row 0: expect 0 index = "comedy"
+        assert_eq!(result_variant.value(0), Variant::from("comedy"));
+        // Row 1: expect 0 index = "horror"
+        assert_eq!(result_variant.value(1), Variant::from("horror"));
+    }
+
+    /// Test extracting shredded list field with type conversion.
+    #[test]
+    fn test_shredded_list_as_string() {
+        let array = shredded_list_variant_array();
+        // Test: Extract the 0 index values as StringArray (type conversion)
+        let field = Field::new("typed_value", DataType::Utf8, false);
+        let options = GetOptions::new_with_path(VariantPath::from(0))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+        // Should get StringArray
+        let expected: ArrayRef = 
Arc::new(StringArray::from(vec![Some("comedy"), Some("horror")]));
+        assert_eq!(&result, &expected);
+    }
+
+    #[test]
+    fn test_shredded_list_index_access_from_value_field() {
+        let array = shredded_list_variant_array();
+        // Index 1 maps to "drama" for row 0, and to fallback value 123 for 
row 1.
+        let options = GetOptions::new_with_path(VariantPath::from(1));
+        let result = variant_get(&array, options).unwrap();
+        let result_variant = VariantArray::try_new(&result).unwrap();
+
+        assert_eq!(result_variant.value(0), Variant::from("drama"));
+        assert_eq!(result_variant.value(1).as_int64(), Some(123));
+    }
+
+    #[test]
+    fn test_shredded_list_index_access_from_value_field_as_int64() {
+        let array = shredded_list_variant_array();
+        let field = Field::new("typed_value", DataType::Int64, true);
+        let options = GetOptions::new_with_path(VariantPath::from(1))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        // "drama" -> NULL, 123 -> 123.
+        let expected: ArrayRef = Arc::new(Int64Array::from(vec![None, 
Some(123)]));
+        assert_eq!(&result, &expected);
+    }
+
+    // The tests below are temp before: 
https://github.com/apache/arrow-rs/issues/9455
+    #[test]
+    fn test_shredded_list_index_out_of_bounds_unsafe_cast_errors() {
+        let options =
+            
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+                safe: false,
+                ..Default::default()
+            });
+
+        let err = variant_get(&shredded_list_variant_array(), 
options.clone()).unwrap_err();
+        assert!(err.to_string().contains("Cannot access index '10'"));
+    }
+
+    #[test]
+    fn test_large_list_index_path_helper() {
+        let large_list = large_list_of_shredded_elements();
+        let state = take_list_index_as_shredding_state(&large_list, 1, 
&CastOptions::default())
+            .unwrap()
+            .unwrap();
+
+        let typed = state
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        let value = state.value_field().unwrap();
+        assert_eq!(typed.value(0), "drama");
+        assert!(typed.is_null(1));
+        assert!(value.is_null(0));
+        assert!(value.is_valid(1));
+    }
+
+    #[test]
+    fn test_large_list_index_out_of_bounds_unsafe_cast_errors() {
+        let large_list = large_list_of_shredded_elements();
+        let err = take_list_index_as_shredding_state(
+            &large_list,
+            10,
+            &CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        )
+        .unwrap_err();
+        assert!(err.to_string().contains("Cannot access index '10'"));
+    }
+
+    #[test]
+    fn test_shredded_list_in_struct_index_access() {
+        let array = shredded_struct_with_list_variant_array();
+        let options = 
GetOptions::new_with_path(VariantPath::try_from("a").unwrap().join(1));

Review Comment:
   nit, now, this can also be `VariantPath::try_from("a[1]").unwrap()`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to