This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 15b18c15fc arrow-cast: Bring back in-order field casting for 
`StructArray` (#9007)
15b18c15fc is described below

commit 15b18c15fc7de21b1de254809e7f42484bad8e17
Author: Frederic Branczyk <[email protected]>
AuthorDate: Thu Dec 18 21:56:32 2025 +0100

    arrow-cast: Bring back in-order field casting for `StructArray` (#9007)
    
    # Which issue does this PR close?
    
    Closes https://github.com/apache/arrow-rs/issues/9005
    
    # Rationale for this change
    
    Not break something in a patch release.
    
    # What changes are included in this PR?
    
    Bring back in-order casting for structs that have equal field numbers.
    
    # Are these changes tested?
    
    Yes, the tests that were modified in
    https://github.com/apache/arrow-rs/pull/8871 were reverted back.
    
    # Are there any user-facing changes?
    
    It brings back functionality.
---
 arrow-cast/src/cast/mod.rs | 147 ++++++++++++++++++++++++++++-----------------
 1 file changed, 93 insertions(+), 54 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 9a0a7997d3..ac6795a3c6 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -254,7 +254,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
             }
 
             // slow path, we match the fields by name
-            to_fields.iter().all(|to_field| {
+            if to_fields.iter().all(|to_field| {
                 from_fields
                     .iter()
                     .find(|from_field| from_field.name() == to_field.name())
@@ -263,7 +263,15 @@ pub fn can_cast_types(from_type: &DataType, to_type: 
&DataType) -> bool {
                         // cast kernel will return error.
                         can_cast_types(from_field.data_type(), 
to_field.data_type())
                     })
-            })
+            }) {
+                return true;
+            }
+
+            // if we couldn't match by name, we try to see if they can be 
matched by position
+            from_fields
+                .iter()
+                .zip(to_fields.iter())
+                .all(|(f1, f2)| can_cast_types(f1.data_type(), f2.data_type()))
         }
         (Struct(_), _) => false,
         (_, Struct(_)) => false,
@@ -1218,49 +1226,12 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
-        (Struct(from_fields), Struct(to_fields)) => {
-            let array = array.as_struct();
-
-            // Fast path: if field names are in the same order, we can just 
zip and cast
-            let fields_match_order = from_fields.len() == to_fields.len()
-                && from_fields
-                    .iter()
-                    .zip(to_fields.iter())
-                    .all(|(f1, f2)| f1.name() == f2.name());
-
-            let fields = if fields_match_order {
-                // Fast path: cast columns in order
-                array
-                    .columns()
-                    .iter()
-                    .zip(to_fields.iter())
-                    .map(|(column, field)| {
-                        cast_with_options(column, field.data_type(), 
cast_options)
-                    })
-                    .collect::<Result<Vec<ArrayRef>, ArrowError>>()?
-            } else {
-                // Slow path: match fields by name and reorder
-                to_fields
-                    .iter()
-                    .map(|to_field| {
-                        let from_field_idx = from_fields
-                            .iter()
-                            .position(|from_field| from_field.name() == 
to_field.name())
-                            .ok_or_else(|| {
-                                ArrowError::CastError(format!(
-                                    "Field '{}' not found in source struct",
-                                    to_field.name()
-                                ))
-                            })?;
-                        let column = array.column(from_field_idx);
-                        cast_with_options(column, to_field.data_type(), 
cast_options)
-                    })
-                    .collect::<Result<Vec<ArrayRef>, ArrowError>>()?
-            };
-
-            let array = StructArray::try_new(to_fields.clone(), fields, 
array.nulls().cloned())?;
-            Ok(Arc::new(array) as ArrayRef)
-        }
+        (Struct(from_fields), Struct(to_fields)) => cast_struct_to_struct(
+            array.as_struct(),
+            from_fields.clone(),
+            to_fields.clone(),
+            cast_options,
+        ),
         (Struct(_), _) => Err(ArrowError::CastError(format!(
             "Casting from {from_type} to {to_type} not supported"
         ))),
@@ -2292,6 +2263,74 @@ pub fn cast_with_options(
     }
 }
 
+fn cast_struct_to_struct(
+    array: &StructArray,
+    from_fields: Fields,
+    to_fields: Fields,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    // Fast path: if field names are in the same order, we can just zip and 
cast
+    let fields_match_order = from_fields.len() == to_fields.len()
+        && from_fields
+            .iter()
+            .zip(to_fields.iter())
+            .all(|(f1, f2)| f1.name() == f2.name());
+
+    let fields = if fields_match_order {
+        // Fast path: cast columns in order if their names match
+        cast_struct_fields_in_order(array, to_fields.clone(), cast_options)?
+    } else {
+        let all_fields_match_by_name = to_fields.iter().all(|to_field| {
+            from_fields
+                .iter()
+                .any(|from_field| from_field.name() == to_field.name())
+        });
+
+        if all_fields_match_by_name {
+            // Slow path: match fields by name and reorder
+            cast_struct_fields_by_name(array, from_fields.clone(), 
to_fields.clone(), cast_options)?
+        } else {
+            // Fallback: cast field by field in order
+            cast_struct_fields_in_order(array, to_fields.clone(), 
cast_options)?
+        }
+    };
+
+    let array = StructArray::try_new(to_fields.clone(), fields, 
array.nulls().cloned())?;
+    Ok(Arc::new(array) as ArrayRef)
+}
+
+fn cast_struct_fields_by_name(
+    array: &StructArray,
+    from_fields: Fields,
+    to_fields: Fields,
+    cast_options: &CastOptions,
+) -> Result<Vec<ArrayRef>, ArrowError> {
+    to_fields
+        .iter()
+        .map(|to_field| {
+            let from_field_idx = from_fields
+                .iter()
+                .position(|from_field| from_field.name() == to_field.name())
+                .unwrap(); // safe because we checked above
+            let column = array.column(from_field_idx);
+            cast_with_options(column, to_field.data_type(), cast_options)
+        })
+        .collect::<Result<Vec<ArrayRef>, ArrowError>>()
+}
+
+fn cast_struct_fields_in_order(
+    array: &StructArray,
+    to_fields: Fields,
+    cast_options: &CastOptions,
+) -> Result<Vec<ArrayRef>, ArrowError> {
+    array
+        .columns()
+        .iter()
+        .zip(to_fields.iter())
+        .map(|(l, field)| cast_with_options(l, field.data_type(), 
cast_options))
+        .collect::<Result<Vec<ArrayRef>, ArrowError>>()
+}
+
 fn cast_from_decimal<D, F>(
     array: &dyn Array,
     base: D::Native,
@@ -10917,11 +10956,11 @@ mod tests {
         let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
         let struct_array = StructArray::from(vec![
             (
-                Arc::new(Field::new("a", DataType::Boolean, false)),
+                Arc::new(Field::new("b", DataType::Boolean, false)),
                 boolean.clone() as ArrayRef,
             ),
             (
-                Arc::new(Field::new("b", DataType::Int32, false)),
+                Arc::new(Field::new("c", DataType::Int32, false)),
                 int.clone() as ArrayRef,
             ),
         ]);
@@ -10965,11 +11004,11 @@ mod tests {
         let int = Arc::new(Int32Array::from(vec![Some(42), None, Some(19), 
None]));
         let struct_array = StructArray::from(vec![
             (
-                Arc::new(Field::new("a", DataType::Boolean, false)),
+                Arc::new(Field::new("b", DataType::Boolean, false)),
                 boolean.clone() as ArrayRef,
             ),
             (
-                Arc::new(Field::new("b", DataType::Int32, true)),
+                Arc::new(Field::new("c", DataType::Int32, true)),
                 int.clone() as ArrayRef,
             ),
         ]);
@@ -10999,11 +11038,11 @@ mod tests {
         let int = Arc::new(Int32Array::from(vec![i32::MAX, 25, 1, 100]));
         let struct_array = StructArray::from(vec![
             (
-                Arc::new(Field::new("a", DataType::Boolean, false)),
+                Arc::new(Field::new("b", DataType::Boolean, false)),
                 boolean.clone() as ArrayRef,
             ),
             (
-                Arc::new(Field::new("b", DataType::Int32, false)),
+                Arc::new(Field::new("c", DataType::Int32, false)),
                 int.clone() as ArrayRef,
             ),
         ]);
@@ -11139,7 +11178,7 @@ mod tests {
         assert!(result.is_err());
         assert_eq!(
             result.unwrap_err().to_string(),
-            "Cast error: Field 'b' not found in source struct"
+            "Invalid argument error: Incorrect number of arrays for 
StructArray fields, expected 2 got 1"
         );
     }
 
@@ -11196,7 +11235,7 @@ mod tests {
     }
 
     #[test]
-    fn test_can_cast_struct_with_missing_field() {
+    fn test_can_cast_struct_rename_field() {
         // Test that can_cast_types returns false when target has a field not 
in source
         let from_type = DataType::Struct(
             vec![
@@ -11214,7 +11253,7 @@ mod tests {
             .into(),
         );
 
-        assert!(!can_cast_types(&from_type, &to_type));
+        assert!(can_cast_types(&from_type, &to_type));
     }
 
     fn run_decimal_cast_test_case_between_multiple_types(t: 
DecimalCastTestConfig) {

Reply via email to