alamb commented on code in PR #4992:
URL: https://github.com/apache/arrow-datafusion/pull/4992#discussion_r1082367358


##########
datafusion/common/src/error.rs:
##########
@@ -359,84 +380,49 @@ impl DataFusionError {
     ///
     /// This may be the same as `self`.
     pub fn find_root(&self) -> &Self {
-        // Note: This is a non-recursive algorithm so we do not run out of 
stack space, even for long error chains. The
-        //       algorithm will always terminate because all steps access the 
next error through "converging" ownership,
-        //       i.e. there can be a fan-in by multiple parents (e.g. via 
`Arc`), but never a fan-out by multiple
-        //       children (e.g. via `Weak` or interior mutability via `Mutex`).
-
-        // last error in the chain that was a DataFusionError
-        let mut checkpoint: &Self = self;
-
-        // current non-DataFusion error
-        let mut other_e: Option<OtherErr<'_>> = None;
-
-        loop {
-            // do we have another error type to explore?
-            if let Some(inner) = other_e {
-                // `other_e` is now bound to `inner`, so we can clear this path
-                other_e = None;
-
-                match inner {
-                    OtherErr::Arrow(inner) => {
-                        if let ArrowError::ExternalError(inner) = inner {
-                            other_e = Some(OtherErr::Dyn(inner.as_ref()));
-                            continue;
-                        }
-                    }
-                    OtherErr::Dyn(inner) => {
-                        if let Some(inner) = inner.downcast_ref::<Self>() {
-                            checkpoint = inner;
-                            continue;
-                        }
-
-                        if let Some(inner) = 
inner.downcast_ref::<ArrowError>() {
-                            other_e = Some(OtherErr::Arrow(inner));
-                            continue;
-                        }
-
-                        // some errors are wrapped into `Arc`s to share them 
with multiple receivers
-                        if let Some(inner) = inner.downcast_ref::<Arc<Self>>() 
{
-                            checkpoint = inner.as_ref();
-                            continue;
-                        }
-
-                        if let Some(inner) = 
inner.downcast_ref::<Arc<ArrowError>>() {
-                            other_e = Some(OtherErr::Arrow(inner.as_ref()));
-                            continue;
-                        }
-                    }
-                }
-
-                // dead end?
-                break;
-            }
-
-            // traverse context chain
-            if let Self::Context(_msg, inner) = checkpoint {
-                checkpoint = inner;
-                continue;
+        // Note: This is a non-recursive algorithm so we do not run
+        // out of stack space, even for long error chains.
+
+        let mut last_datafusion_error = self;
+        let mut root_error: &dyn Error = self;
+        while let Some(source) = find_source(root_error) {
+            // walk the next level
+            root_error = source;
+            // remember the lowest datafusion error so far
+            if let Some(e) = root_error.downcast_ref::<DataFusionError>() {
+                last_datafusion_error = e;
             }
-
-            // The Arrow error may itself contain a datafusion error again
-            // See https://github.com/apache/arrow-datafusion/issues/4172
-            if let Self::ArrowError(inner) = checkpoint {
-                other_e = Some(OtherErr::Arrow(inner));
-                continue;
-            }
-
-            // also try to introspect direct external errors
-            if let Self::External(inner) = checkpoint {
-                other_e = Some(OtherErr::Dyn(inner.as_ref()));
-                continue;
-            }
-
-            // no more traversal
-            break;
         }
-
         // return last checkpoint (which may be the original error)
-        checkpoint
+        last_datafusion_error
+    }
+}
+
+fn find_source<'a>(e: &'a (dyn Error + 'static)) -> Option<&'a (dyn Error + 
'static)> {
+    // workaround until https://github.com/apache/arrow-rs/issues/3566 is 
released
+    if let Some(e) = e.downcast_ref::<ArrowError>() {
+        return if let ArrowError::ExternalError(e) = e {
+            Some(e.as_ref())
+        } else {
+            None
+        };
     }
+    // some errors are wrapped into `Arc`s to share them with multiple

Review Comment:
   It is necessary, but I don't know why. The tests fail without it. If you 
could help figure out how to avoid it it would be great. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to