sdf-jkl commented on code in PR #9663:
URL: https://github.com/apache/arrow-rs/pull/9663#discussion_r3074052436
##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -68,6 +68,14 @@ use std::sync::Arc;
/// See [`ShreddedSchemaBuilder`] for a convenient way to build the `as_type`
/// value passed to this function.
pub fn shred_variant(array: &VariantArray, as_type: &DataType) ->
Result<VariantArray> {
+ shred_variant_with_options(array, as_type, CastOptions::default())
+}
+
+pub fn shred_variant_with_options(
+ array: &VariantArray,
+ as_type: &DataType,
+ cast_options: CastOptions,
Review Comment:
I think it's a nice addition!
We could borrow `CastOptions` to follow other `_with_options` APIs in this
crate.
```suggestion
cast_options: &CastOptions,
```
##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -321,12 +328,15 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
// If the variant is not an array, typed_value must be null.
Review Comment:
Should add a comment for the new case where cast fails and we keep the value.
##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -1676,13 +1686,37 @@ mod tests {
])]);
let list_schema =
DataType::FixedSizeList(Arc::new(Field::new("item",
DataType::Int64, true)), 2);
- let err = shred_variant(&input, &list_schema).unwrap_err();
- println!("{}", err);
+
+ let result = shred_variant_with_options(
+ &input,
+ &list_schema,
+ CastOptions {
+ safe: true,
+ ..Default::default()
+ },
+ )
+ .unwrap();
+ assert_eq!(result.len(), 1);
+
+ // With `safe` cast seto to true, the incorrect size should not raise
error.
+ assert!(result.is_valid(0));
+ assert!(result.value_field().unwrap().is_valid(0));
+ assert!(result.typed_value_field().unwrap().is_null(0));
+
+ // With `safe` cast set to false, the incorrect size should raise
error.
Review Comment:
same nit as above
```suggestion
// With `safe` set to false, the incorrect size should raise error.
```
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -919,6 +920,13 @@ enum ListElementBuilder<'a> {
}
impl<'a> ListElementBuilder<'a> {
+ fn append_null(&mut self) -> Result<()> {
Review Comment:
Maybe add a comment mentioning this is only used for FixedSizeList builder?
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -4320,61 +4331,30 @@ mod test {
safe: true,
..Default::default()
});
-
- // Verify that the shredded value is a `FixedSizeList`.
let result = variant_get(&variant_array, options).unwrap();
let fixed_size_list = result
.as_any()
.downcast_ref::<FixedSizeListArray>()
.expect("Expected FixedSizeListArray");
- assert_eq!(fixed_size_list.len(), 3);
- assert_eq!(fixed_size_list.value_length(), 2);
-
- // Verify that the first entry in the `FixedSizeList` contains the
expected value.
- assert!(fixed_size_list.is_valid(0));
- let val0 = fixed_size_list.value(0);
- let val0_struct = val0.as_any().downcast_ref::<StructArray>().unwrap();
- let val0_typed = val0_struct.column_by_name("typed_value").unwrap();
- let val0_ints =
val0_typed.as_any().downcast_ref::<Int64Array>().unwrap();
- assert_eq!(val0_ints.values(), &[1i64, 2i64]);
-
- // Verify that the second entry in the `FixedSizeList` contains the
expected value.
- assert!(fixed_size_list.is_valid(1));
- let val1 = fixed_size_list.value(1);
- let val1_struct = val1.as_any().downcast_ref::<StructArray>().unwrap();
- let val1_typed = val1_struct.column_by_name("typed_value").unwrap();
- let val1_ints =
val1_typed.as_any().downcast_ref::<Int64Array>().unwrap();
- assert_eq!(val1_ints.values(), &[3i64, 4i64]);
-
- // Verify that the third entry is null due to type mismatch.
- assert!(fixed_size_list.is_null(2));
- }
+ assert_eq!(fixed_size_list.len(), 1);
+ assert!(fixed_size_list.is_null(0));
- #[test]
- fn test_variant_get_fixed_size_list_wrong_size() {
- let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2,
3]"]));
- let variant_array =
ArrayRef::from(json_to_variant(&string_array).unwrap());
- let item_field = Arc::new(Field::new("item", Int64, true));
-
- // Set the safe flag to both true and false and verify that size
mismatch raises an error
- // for `FixedSizeList`, regardless.
- for safe in [true, false] {
- let options = GetOptions::new()
- .with_as_type(Some(FieldRef::from(Field::new(
- "result",
- DataType::FixedSizeList(item_field.clone(), 2),
- true,
- ))))
- .with_cast_options(CastOptions {
- safe,
- ..Default::default()
- });
- let err = variant_get(&variant_array, options).unwrap_err();
- assert!(
- err.to_string()
- .contains("Expected fixed size list of size 2, got size
3"),
- "safe={safe}, got: {err}",
- );
- }
+ // With `safe` option set to false, error should be raise on wrong
sized fixed list.
Review Comment:
nit:
```suggestion
// With `safe` set to false, error should be raised on wrong sized
fixed list.
```
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -4299,17 +4315,12 @@ mod test {
}
#[test]
- fn test_variant_get_fixed_size_list_with_safe_option() {
- let string_array: ArrayRef = Arc::new(StringArray::from(vec![
- "[1, 2]",
- "[3, 4]",
- "\"not a list\"",
- ]));
+ fn test_variant_get_fixed_size_list_wrong_size() {
+ let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2,
3]"]));
let variant_array =
ArrayRef::from(json_to_variant(&string_array).unwrap());
let item_field = Arc::new(Field::new("item", Int64, true));
- // Request shredding on `FixedSizeList` with `safe` set to true, such
that `variant_get`
- // does not raise error on type mismatch.
+ // With `safe` set to true, size mismatch should return in null.
Review Comment:
nit
```suggestion
// With `safe` set to true, size mismatch should return Null.
```
##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -1676,13 +1686,37 @@ mod tests {
])]);
let list_schema =
DataType::FixedSizeList(Arc::new(Field::new("item",
DataType::Int64, true)), 2);
- let err = shred_variant(&input, &list_schema).unwrap_err();
- println!("{}", err);
+
+ let result = shred_variant_with_options(
+ &input,
+ &list_schema,
+ CastOptions {
+ safe: true,
+ ..Default::default()
+ },
+ )
+ .unwrap();
+ assert_eq!(result.len(), 1);
+
+ // With `safe` cast seto to true, the incorrect size should not raise
error.
Review Comment:
nit: remove cast for consistency with other comments
```suggestion
// With `safe` set to to true, the incorrect size should not raise
error.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]