klion26 commented on code in PR #9862:
URL: https://github.com/apache/arrow-rs/pull/9862#discussion_r3297834429


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -244,44 +245,135 @@ fn shredded_get_path(
     shred_basic_variant(target, VariantPath::default(), Some(as_field))
 }
 
-fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> 
Option<ArrayRef> {
+fn try_perfect_shredding(
+    variant_array: &VariantArray,
+    as_field: &Field,
+    cast_options: &CastOptions,
+) -> Result<Option<ArrayRef>> {
     // Try to return the typed value directly when we have a perfect shredding 
match.
     if matches!(as_field.data_type(), DataType::Struct(_)) {
-        return None;
+        return Ok(None);
     }
-    let typed_value = variant_array.typed_value_field()?;
+    let Some(typed_value) = variant_array.typed_value_field() else {
+        return Ok(None);
+    };
 
-    if typed_value.data_type() == as_field.data_type()
-        && variant_array
-            .value_field()
-            .is_none_or(|v| v.null_count() == v.len())
+    if variant_array
+        .value_field()
+        .is_some_and(|v| v.null_count() != v.len())
     {
-        // Here we need to gate against the case where the `typed_value` is 
null but data is in the `value` column.
-        // 1. If the `value` column is null, or
-        // 2. If every row in the `value` column is null
-
-        // This is a perfect shredding, where the value is entirely shredded 
out,
-        // so we can just return the typed value after merging the accumulated 
nulls.
-        let parent_nulls = variant_array.nulls();
-
-        // If we have no nulls OR the shredded array is `Null`, which doesn't 
support external nulls.
-        let target_array = if parent_nulls.is_none() || 
typed_value.data_type().is_null() {
-            typed_value.clone()
-        } else {
-            let merged_nulls = NullBuffer::union(parent_nulls, 
typed_value.nulls());
-            let data = typed_value
-                .to_data()
-                .into_builder()
-                .nulls(merged_nulls)
-                .build()
-                .ok()?;
-            make_array(data)
-        };
+        // We can only use the perfect-shredding fast path when the value is 
entirely shredded out.
+        return Ok(None);
+    }
+
+    // Here we need to gate against the case where the `typed_value` is null 
but data is in the
+    // `value` column:
+    // 1. If the `value` column is null, or
+    // 2. If every row in the `value` column is null
+
+    // This is a perfect shredding, where the value is entirely shredded out, 
so we can reuse the
+    // typed value after merging the accumulated nulls from the traversed 
parent fields.
+    let parent_nulls = variant_array.nulls();
+
+    // If we have no nulls OR the shredded array is `Null`, which doesn't 
support external nulls.
+    let target_array = if parent_nulls.is_none() || 
typed_value.data_type().is_null() {
+        typed_value.clone()
+    } else {
+        let merged_nulls = NullBuffer::union(parent_nulls, 
typed_value.nulls());
+        let data = typed_value
+            .to_data()
+            .into_builder()
+            .nulls(merged_nulls)
+            .build()?;
+        make_array(data)
+    };
+
+    if target_array.data_type() == as_field.data_type() {
+        return Ok(Some(target_array));
+    }
+
+    if !can_use_perfect_shredding_arrow_cast(target_array.data_type(), 
as_field.data_type()) {
+        return Ok(None);
+    }
+
+    // Use Arrow's vectorized cast when it cleanly matches the shredded 
representation. If not,
+    // fall back to row-wise extraction to preserve the existing 
variant-specific semantics.
+    Ok(cast_with_options(target_array.as_ref(), as_field.data_type(), 
cast_options).ok())
+}
 
-        return Some(target_array);
+fn can_use_perfect_shredding_arrow_cast(from_type: &DataType, to_type: 
&DataType) -> bool {
+    use DataType::*;
+
+    if !can_cast_types(from_type, to_type) {
+        return false;
     }
 
-    None
+    match from_type {

Review Comment:
   Seems this overlaps with `can_cast_types`? can we simplify this



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -4183,6 +4298,23 @@ mod test {
         }
     }
 
+    #[test]
+    fn test_perfect_shredding_list_cast_gate_uses_variant_element_semantics() {
+        let int64_item = Arc::new(Field::new("item", DataType::Int64, true));

Review Comment:
   Do we need to add more tests to cover the logic of 
`can_use_perfect_shredding_arrow_cast` here?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -244,44 +245,135 @@ fn shredded_get_path(
     shred_basic_variant(target, VariantPath::default(), Some(as_field))
 }
 
-fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> 
Option<ArrayRef> {
+fn try_perfect_shredding(
+    variant_array: &VariantArray,
+    as_field: &Field,
+    cast_options: &CastOptions,
+) -> Result<Option<ArrayRef>> {

Review Comment:
   Why do we need the `Result` in the return value? Is it ok to use 
`Option<arrayRef>`  here?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -244,44 +245,135 @@ fn shredded_get_path(
     shred_basic_variant(target, VariantPath::default(), Some(as_field))
 }
 
-fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> 
Option<ArrayRef> {
+fn try_perfect_shredding(
+    variant_array: &VariantArray,
+    as_field: &Field,
+    cast_options: &CastOptions,
+) -> Result<Option<ArrayRef>> {
     // Try to return the typed value directly when we have a perfect shredding 
match.
     if matches!(as_field.data_type(), DataType::Struct(_)) {
-        return None;
+        return Ok(None);
     }
-    let typed_value = variant_array.typed_value_field()?;
+    let Some(typed_value) = variant_array.typed_value_field() else {
+        return Ok(None);
+    };
 
-    if typed_value.data_type() == as_field.data_type()
-        && variant_array
-            .value_field()
-            .is_none_or(|v| v.null_count() == v.len())
+    if variant_array
+        .value_field()
+        .is_some_and(|v| v.null_count() != v.len())
     {
-        // Here we need to gate against the case where the `typed_value` is 
null but data is in the `value` column.
-        // 1. If the `value` column is null, or
-        // 2. If every row in the `value` column is null
-
-        // This is a perfect shredding, where the value is entirely shredded 
out,
-        // so we can just return the typed value after merging the accumulated 
nulls.
-        let parent_nulls = variant_array.nulls();
-
-        // If we have no nulls OR the shredded array is `Null`, which doesn't 
support external nulls.
-        let target_array = if parent_nulls.is_none() || 
typed_value.data_type().is_null() {
-            typed_value.clone()
-        } else {
-            let merged_nulls = NullBuffer::union(parent_nulls, 
typed_value.nulls());
-            let data = typed_value
-                .to_data()
-                .into_builder()
-                .nulls(merged_nulls)
-                .build()
-                .ok()?;
-            make_array(data)
-        };
+        // We can only use the perfect-shredding fast path when the value is 
entirely shredded out.
+        return Ok(None);
+    }
+
+    // Here we need to gate against the case where the `typed_value` is null 
but data is in the
+    // `value` column:
+    // 1. If the `value` column is null, or
+    // 2. If every row in the `value` column is null
+
+    // This is a perfect shredding, where the value is entirely shredded out, 
so we can reuse the
+    // typed value after merging the accumulated nulls from the traversed 
parent fields.
+    let parent_nulls = variant_array.nulls();
+
+    // If we have no nulls OR the shredded array is `Null`, which doesn't 
support external nulls.
+    let target_array = if parent_nulls.is_none() || 
typed_value.data_type().is_null() {
+        typed_value.clone()
+    } else {
+        let merged_nulls = NullBuffer::union(parent_nulls, 
typed_value.nulls());
+        let data = typed_value
+            .to_data()
+            .into_builder()
+            .nulls(merged_nulls)
+            .build()?;
+        make_array(data)
+    };
+
+    if target_array.data_type() == as_field.data_type() {
+        return Ok(Some(target_array));
+    }
+
+    if !can_use_perfect_shredding_arrow_cast(target_array.data_type(), 
as_field.data_type()) {
+        return Ok(None);
+    }
+
+    // Use Arrow's vectorized cast when it cleanly matches the shredded 
representation. If not,
+    // fall back to row-wise extraction to preserve the existing 
variant-specific semantics.
+    Ok(cast_with_options(target_array.as_ref(), as_field.data_type(), 
cast_options).ok())

Review Comment:
   Does this mean that `cast_with_options` throws some error if the return 
value here is None? do we need to add some tests to cover this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to