klion26 commented on code in PR #9862:
URL: https://github.com/apache/arrow-rs/pull/9862#discussion_r3317553331


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -244,44 +245,135 @@ fn shredded_get_path(
     shred_basic_variant(target, VariantPath::default(), Some(as_field))
 }
 
-fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> 
Option<ArrayRef> {
+fn try_perfect_shredding(
+    variant_array: &VariantArray,
+    as_field: &Field,
+    cast_options: &CastOptions,
+) -> Result<Option<ArrayRef>> {
     // Try to return the typed value directly when we have a perfect shredding 
match.
     if matches!(as_field.data_type(), DataType::Struct(_)) {
-        return None;
+        return Ok(None);
     }
-    let typed_value = variant_array.typed_value_field()?;
+    let Some(typed_value) = variant_array.typed_value_field() else {
+        return Ok(None);
+    };
 
-    if typed_value.data_type() == as_field.data_type()
-        && variant_array
-            .value_field()
-            .is_none_or(|v| v.null_count() == v.len())
+    if variant_array
+        .value_field()
+        .is_some_and(|v| v.null_count() != v.len())
     {
-        // Here we need to gate against the case where the `typed_value` is 
null but data is in the `value` column.
-        // 1. If the `value` column is null, or
-        // 2. If every row in the `value` column is null
-
-        // This is a perfect shredding, where the value is entirely shredded 
out,
-        // so we can just return the typed value after merging the accumulated 
nulls.
-        let parent_nulls = variant_array.nulls();
-
-        // If we have no nulls OR the shredded array is `Null`, which doesn't 
support external nulls.
-        let target_array = if parent_nulls.is_none() || 
typed_value.data_type().is_null() {
-            typed_value.clone()
-        } else {
-            let merged_nulls = NullBuffer::union(parent_nulls, 
typed_value.nulls());
-            let data = typed_value
-                .to_data()
-                .into_builder()
-                .nulls(merged_nulls)
-                .build()
-                .ok()?;
-            make_array(data)
-        };
+        // We can only use the perfect-shredding fast path when the value is 
entirely shredded out.
+        return Ok(None);
+    }
+
+    // Here we need to gate against the case where the `typed_value` is null 
but data is in the
+    // `value` column:
+    // 1. If the `value` column is null, or
+    // 2. If every row in the `value` column is null
+
+    // This is a perfect shredding, where the value is entirely shredded out, 
so we can reuse the
+    // typed value after merging the accumulated nulls from the traversed 
parent fields.
+    let parent_nulls = variant_array.nulls();
+
+    // If we have no nulls OR the shredded array is `Null`, which doesn't 
support external nulls.
+    let target_array = if parent_nulls.is_none() || 
typed_value.data_type().is_null() {
+        typed_value.clone()
+    } else {
+        let merged_nulls = NullBuffer::union(parent_nulls, 
typed_value.nulls());
+        let data = typed_value
+            .to_data()
+            .into_builder()
+            .nulls(merged_nulls)
+            .build()?;
+        make_array(data)
+    };
+
+    if target_array.data_type() == as_field.data_type() {
+        return Ok(Some(target_array));
+    }
+
+    if !can_use_perfect_shredding_arrow_cast(target_array.data_type(), 
as_field.data_type()) {
+        return Ok(None);
+    }
+
+    // Use Arrow's vectorized cast when it cleanly matches the shredded 
representation. If not,
+    // fall back to row-wise extraction to preserve the existing 
variant-specific semantics.
+    Ok(cast_with_options(target_array.as_ref(), as_field.data_type(), 
cast_options).ok())
+}
 
-        return Some(target_array);
+fn can_use_perfect_shredding_arrow_cast(from_type: &DataType, to_type: 
&DataType) -> bool {
+    use DataType::*;
+
+    if !can_cast_types(from_type, to_type) {
+        return false;
     }
 
-    None
+    match from_type {

Review Comment:
   As we will align the logic between variant and arrow-cast, does this mean we 
can only keep the logic `can_cast_types` above? If `can_cast_types` returns 
tru,e we'll always try to do the cast



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -244,44 +245,135 @@ fn shredded_get_path(
     shred_basic_variant(target, VariantPath::default(), Some(as_field))
 }
 
-fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> 
Option<ArrayRef> {
+fn try_perfect_shredding(
+    variant_array: &VariantArray,
+    as_field: &Field,
+    cast_options: &CastOptions,
+) -> Result<Option<ArrayRef>> {

Review Comment:
   Here we still keep the `Result`, 1) seems we can replace with 
`Option<ArrayRef>` if we don't change the code in L284; 2) the code here 
changed the logic because if we throw an error here, then in L284 we will 
return directly(a minimal 
[rust-play](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=f3b8abaed291340347bc39fa5e0c5d5d))
 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to