This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new ec85145f4bf Add `try_filter_leaves` to propagate error from filter 
closure (#5575)
ec85145f4bf is described below

commit ec85145f4bf486851013faf7f3af9a871e5a9d59
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Wed Apr 3 01:37:34 2024 -0700

    Add `try_filter_leaves` to propagate error from filter closure (#5575)
    
    * Propagate error from filter closure
    
    * Add try_filter_leaves instead
---
 arrow-schema/src/fields.rs | 97 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 78 insertions(+), 19 deletions(-)

diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index 59b7e76c782..5a1a6c84c25 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -137,10 +137,22 @@ impl Fields {
     /// assert_eq!(filtered, expected);
     /// ```
     pub fn filter_leaves<F: FnMut(usize, &FieldRef) -> bool>(&self, mut 
filter: F) -> Self {
-        fn filter_field<F: FnMut(&FieldRef) -> bool>(
+        self.try_filter_leaves(|idx, field| Ok(filter(idx, field)))
+            .unwrap()
+    }
+
+    /// Returns a copy of this [`Fields`] containing only those [`FieldRef`] 
passing a predicate
+    /// or an error if the predicate fails.
+    ///
+    /// See [`Fields::filter_leaves`] for more information.
+    pub fn try_filter_leaves<F: FnMut(usize, &FieldRef) -> Result<bool, 
ArrowError>>(
+        &self,
+        mut filter: F,
+    ) -> Result<Self, ArrowError> {
+        fn filter_field<F: FnMut(&FieldRef) -> Result<bool, ArrowError>>(
             f: &FieldRef,
             filter: &mut F,
-        ) -> Option<FieldRef> {
+        ) -> Result<Option<FieldRef>, ArrowError> {
             use DataType::*;
 
             let v = match f.data_type() {
@@ -149,35 +161,72 @@ impl Fields {
                 d => d,
             };
             let d = match v {
-                List(child) => List(filter_field(child, filter)?),
-                LargeList(child) => LargeList(filter_field(child, filter)?),
-                Map(child, ordered) => Map(filter_field(child, filter)?, 
*ordered),
-                FixedSizeList(child, size) => 
FixedSizeList(filter_field(child, filter)?, *size),
+                List(child) => {
+                    let fields = filter_field(child, filter)?;
+                    if let Some(fields) = fields {
+                        List(fields)
+                    } else {
+                        return Ok(None);
+                    }
+                }
+                LargeList(child) => {
+                    let fields = filter_field(child, filter)?;
+                    if let Some(fields) = fields {
+                        LargeList(fields)
+                    } else {
+                        return Ok(None);
+                    }
+                }
+                Map(child, ordered) => {
+                    let fields = filter_field(child, filter)?;
+                    if let Some(fields) = fields {
+                        Map(fields, *ordered)
+                    } else {
+                        return Ok(None);
+                    }
+                }
+                FixedSizeList(child, size) => {
+                    let fields = filter_field(child, filter)?;
+                    if let Some(fields) = fields {
+                        FixedSizeList(fields, *size)
+                    } else {
+                        return Ok(None);
+                    }
+                }
                 Struct(fields) => {
-                    let filtered: Fields = fields
+                    let filtered: Result<Vec<_>, _> =
+                        fields.iter().map(|f| filter_field(f, 
filter)).collect();
+                    let filtered: Fields = filtered?
                         .iter()
-                        .filter_map(|f| filter_field(f, filter))
+                        .filter_map(|f| f.as_ref().cloned())
                         .collect();
 
                     if filtered.is_empty() {
-                        return None;
+                        return Ok(None);
                     }
 
                     Struct(filtered)
                 }
                 Union(fields, mode) => {
-                    let filtered: UnionFields = fields
+                    let filtered: Result<Vec<_>, _> = fields
+                        .iter()
+                        .map(|(id, f)| filter_field(f, filter).map(|f| 
f.map(|f| (id, f))))
+                        .collect();
+                    let filtered: UnionFields = filtered?
                         .iter()
-                        .filter_map(|(id, f)| Some((id, filter_field(f, 
filter)?)))
+                        .filter_map(|f| f.as_ref().cloned())
                         .collect();
 
                     if filtered.is_empty() {
-                        return None;
+                        return Ok(None);
                     }
 
                     Union(filtered, *mode)
                 }
-                _ => return filter(f).then(|| f.clone()),
+                _ => {
+                    let filtered = filter(f)?;
+                    return Ok(filtered.then(|| f.clone()));
+                }
             };
             let d = match f.data_type() {
                 Dictionary(k, _) => Dictionary(k.clone(), Box::new(d)),
@@ -186,20 +235,26 @@ impl Fields {
                 }
                 _ => d,
             };
-            Some(Arc::new(f.as_ref().clone().with_data_type(d)))
+            Ok(Some(Arc::new(f.as_ref().clone().with_data_type(d))))
         }
 
         let mut leaf_idx = 0;
         let mut filter = |f: &FieldRef| {
-            let t = filter(leaf_idx, f);
+            let t = filter(leaf_idx, f)?;
             leaf_idx += 1;
-            t
+            Ok(t)
         };
 
-        self.0
+        let filtered: Result<Vec<_>, _> = self
+            .0
             .iter()
-            .filter_map(|f| filter_field(f, &mut filter))
-            .collect()
+            .map(|f| filter_field(f, &mut filter))
+            .collect();
+        let filtered = filtered?
+            .iter()
+            .filter_map(|f| f.as_ref().cloned())
+            .collect();
+        Ok(filtered)
     }
 
     /// Remove a field by index and return it.
@@ -531,5 +586,9 @@ mod tests {
         let r = fields.filter_leaves(|idx, _| idx == 14 || idx == 15);
         assert_eq!(r.len(), 1);
         assert_eq!(r[0], fields[9]);
+
+        // Propagate error
+        let r = fields.try_filter_leaves(|_, _| 
Err(ArrowError::SchemaError("error".to_string())));
+        assert!(r.is_err());
     }
 }

Reply via email to